From 2dc8d7cf00504a1235ceed89123afef87be7d16f Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 11 Nov 2018 17:04:34 -0600 Subject: [PATCH 01/46] WIP: Starts adding mempool to context. Need to figure out a way to get myself out of the handle<->context fwd decl. issue. --- viennacl/ocl/context.hpp | 140 ++++++- viennacl/ocl/forwards.h | 7 +- viennacl/ocl/handle.hpp | 30 +- viennacl/ocl/mempool/bitlog.hpp | 92 +++++ viennacl/ocl/mempool/mempool.hpp | 542 +++++++++++++++++++++++++ viennacl/ocl/mempool/mempool_utils.hpp | 108 +++++ viennacl/tools/mempool.hpp | 382 +++++++++++++++++ 7 files changed, 1295 insertions(+), 6 deletions(-) create mode 100644 viennacl/ocl/mempool/bitlog.hpp create mode 100644 viennacl/ocl/mempool/mempool.hpp create mode 100644 viennacl/ocl/mempool/mempool_utils.hpp create mode 100644 viennacl/tools/mempool.hpp diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index b8b7f19f..24981c87 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -8,6 +8,7 @@ Portions of this software are copyright by UChicago Argonne, LLC. ----------------- + ViennaCL - The Vienna Computing Library ----------------- @@ -43,10 +44,122 @@ #include "viennacl/ocl/command_queue.hpp" #include "viennacl/tools/sha1.hpp" #include "viennacl/tools/shared_ptr.hpp" +#include "viennacl/ocl/mempool/bitlog.hpp" +#include "viennacl/ocl/mempool/mempool_utils.hpp" +#include "viennacl/tools/shared_ptr.hpp" + + namespace viennacl { namespace ocl { + + // {{{ memory pool declaration + + class cl_allocator_base + { + protected: + tools::shared_ptr m_context; + cl_mem_flags m_flags; + public: + cl_allocator_base(tools::shared_ptr const&, cl_mem_flags); + + cl_allocator_base(cl_allocator_base const &src); + + virtual ~cl_allocator_base(); + + virtual cl_allocator_base *copy() const; + virtual bool is_deferred() const; + virtual cl_mem allocate(size_t); + + void free(cl_mem ); + }; + + class cl_deferred_allocator : public cl_allocator_base + { + public: + cl_deferred_allocator(tools::shared_ptr const&, + cl_mem_flags); + + cl_allocator_base *copy() const; + + bool is_deferred() const; + + cl_mem allocate(size_t ); + }; + + class cl_immediate_allocator : public cl_allocator_base + { + private: + tools::shared_ptr const & m_queue; + + public: + // NOTE: Changed the declaration as viennacl comman=d queue does nt store + // the context + cl_immediate_allocator(tools::shared_ptr const&, + tools::shared_ptr const &, + cl_mem_flags); + + cl_immediate_allocator(cl_immediate_allocator const &); + + cl_allocator_base *copy() const; + + bool is_deferred() const; + + cl_mem allocate(size_t ); + }; + + template + class memory_pool : mempool::noncopyable + { + private: + + std::map> m_container; + + std::unique_ptr m_allocator; + + // A held block is one that's been released by the application, but that + // we are keeping around to dish out again. + unsigned m_held_blocks; + + // An active block is one that is in use by the application. + unsigned m_active_blocks; + + bool m_stop_holding; + int m_trace; + + cl_mem get_from_allocator(size_t ); + cl_mem pop_block_from_bin(std::vector& , size_t ); + + public: + memory_pool(Allocator const&); + virtual ~memory_pool(); + static const unsigned mantissa_bits = 2; + static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; + static uint32_t bin_number(size_t ); + void set_trace(bool ); + static size_t alloc_size(uint32_t ); + + cl_mem allocate(size_t ); + void free(cl_mem , size_t ); + void free_held(); + void stop_holding(); + unsigned active_blocks(); + unsigned held_blocks(); + bool try_to_free_memory(); + + protected: + std::vector &get_bin(uint32_t ); + void inc_held_blocks(); + void dec_held_blocks(); + + virtual void start_holding_blocks(); + virtual void stop_holding_blocks(); + }; + + // }}} + + /** @brief Manages an OpenCL context and provides the respective convenience functions for creating buffers, etc. * * This class was originally written before the OpenCL C++ bindings were standardized. @@ -64,6 +177,12 @@ public: pf_index_(0), current_queue_id_(0) { + allocators_[0] = new + cl_immediate_allocator(tools::shared_ptr(this), + tools::shared_ptr(&get_queue()), + CL_MEM_READ_WRITE); + mempools_[0] = new + memory_pool(*allocators_[0]); if (std::getenv("VIENNACL_CACHE_PATH")) cache_path_ = std::getenv("VIENNACL_CACHE_PATH"); else @@ -84,6 +203,8 @@ public: /** @brief Sets the maximum number of devices to be set up for the context */ void default_device_num(vcl_size_t new_num) { default_device_num_ = new_num; } + /** Creating a memory pool */ + ////////// get and set preferred device type ///////////////////// /** @brief Returns the default device type for the context */ cl_device_type default_device_type() @@ -197,13 +318,18 @@ public: * @param flags OpenCL flags for the buffer creation * @param size Size of the memory buffer in bytes * @param ptr Optional pointer to CPU memory, with which the OpenCL memory should be initialized + * @param use_mempool Optional boolean to create memory through the memory pool. * @return A plain OpenCL handle. Either assign it to a viennacl::ocl::handle directly, or make sure that you free to memory manually if you no longer need the allocated memory. */ - cl_mem create_memory_without_smart_handle(cl_mem_flags flags, unsigned int size, void * ptr = NULL) const + cl_mem create_memory_without_smart_handle(cl_mem_flags flags, unsigned int size, void * ptr = NULL, bool use_mempool = false) const { #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Creating memory of size " << size << " for context " << h_ << " (unsafe, returning cl_mem directly)" << std::endl; #endif + if(use_mempool){ + cl_mem mem = get_mempool()->allocate(size); + return mem; + } if (ptr && !(flags & CL_MEM_USE_HOST_PTR)) flags |= CL_MEM_COPY_HOST_PTR; cl_int err; @@ -279,6 +405,14 @@ public: return queues_[devices_[current_device_id_].id()][current_queue_id_]; } + //get current mempool + viennacl::ocl::memory_pool* const& get_mempool() const + { + typedef std::map< cl_device_id, viennacl::ocl::memory_pool* > MempoolContainer; + MempoolContainer::const_iterator it = mempools_.find(devices_[current_device_id_].id()); + return it->second; + } + viennacl::ocl::command_queue const & get_queue() const { typedef std::map< cl_device_id, std::vector > QueueContainer; @@ -765,6 +899,10 @@ private: std::string build_options_; vcl_size_t pf_index_; vcl_size_t current_queue_id_; + + // Memory pool + std::map< cl_device_id, cl_immediate_allocator*> allocators_; + std::map< cl_device_id, memory_pool*> mempools_; }; //context diff --git a/viennacl/ocl/forwards.h b/viennacl/ocl/forwards.h index a3603e91..56ba898f 100644 --- a/viennacl/ocl/forwards.h +++ b/viennacl/ocl/forwards.h @@ -25,6 +25,7 @@ #define VIENNACL_OCL_MAX_DEVICE_NUM 8 #include +#include namespace viennacl { @@ -41,15 +42,15 @@ namespace viennacl struct default_tag {}; + template + class handle; + class kernel; class device; class command_queue; class context; class program; - template - class handle; - template void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue); diff --git a/viennacl/ocl/handle.hpp b/viennacl/ocl/handle.hpp index 8283f848..152220ac 100644 --- a/viennacl/ocl/handle.hpp +++ b/viennacl/ocl/handle.hpp @@ -45,6 +45,7 @@ namespace viennacl class handle_inc_dec_helper { typedef typename OCL_TYPE::ERROR_TEMPLATE_ARGUMENT_FOR_CLASS_INVALID ErrorType; + }; /** \cond */ @@ -63,6 +64,12 @@ namespace viennacl cl_int err = clReleaseMemObject(something); VIENNACL_ERR_CHECK(err); } + + static void dec_via_mempool(viennacl::ocl::context const & ctx, cl_mem & something) + { + cl_int err = clReleaseMemObject(something); + VIENNACL_ERR_CHECK(err); + } }; //cl_program: @@ -83,6 +90,9 @@ namespace viennacl VIENNACL_ERR_CHECK(err); #endif } + + static void dec_via_mempool(viennacl::ocl::context const & ctx, + cl_program& something){} }; //cl_kernel: @@ -103,6 +113,7 @@ namespace viennacl VIENNACL_ERR_CHECK(err); #endif } + static void dec_via_mempool(viennacl::ocl::context const & ctx, cl_kernel& something){} }; //cl_command_queue: @@ -123,6 +134,7 @@ namespace viennacl VIENNACL_ERR_CHECK(err); #endif } + static void dec_via_mempool(viennacl::ocl::context const & ctx, cl_command_queue& something){} }; //cl_context: @@ -143,6 +155,7 @@ namespace viennacl VIENNACL_ERR_CHECK(err); #endif } + static void dec_via_mempool(viennacl::ocl::context const & ctx, cl_context& something){} }; /** \endcond */ @@ -150,9 +163,17 @@ namespace viennacl template class handle { + bool used_mempool = false; public: handle() : h_(0), p_context_(NULL) {} - handle(const OCL_TYPE & something, viennacl::ocl::context const & c) : h_(something), p_context_(&c) {} + handle(const OCL_TYPE & something, viennacl::ocl::context const & c, bool _u = false) : used_mempool(_u), h_(something), p_context_(&c) + { + if((typeid(OCL_TYPE) != typeid(cl_mem)) && used_mempool) + { + std::cerr << "[handle]: memory pool is only available for memory objects." << std::endl; + throw std::exception(); + } + } handle(const handle & other) : h_(other.h_), p_context_(other.p_context_) { if (h_ != 0) inc(); } ~handle() { if (h_ != 0) dec(); } @@ -215,7 +236,12 @@ namespace viennacl /** @brief Manually increment the OpenCL reference count. Typically called automatically, but is necessary if user-supplied memory objects are wrapped. */ void inc() { handle_inc_dec_helper::inc(h_); } /** @brief Manually decrement the OpenCL reference count. Typically called automatically, but might be useful with user-supplied memory objects. */ - void dec() { handle_inc_dec_helper::dec(h_); } + void dec() { + if(used_mempool) + handle_inc_dec_helper::dec(h_); + else + handle_inc_dec_helper::dec_via_mempool(context(), h_); + } private: OCL_TYPE h_; viennacl::ocl::context const * p_context_; diff --git a/viennacl/ocl/mempool/bitlog.hpp b/viennacl/ocl/mempool/bitlog.hpp new file mode 100644 index 00000000..23dac7de --- /dev/null +++ b/viennacl/ocl/mempool/bitlog.hpp @@ -0,0 +1,92 @@ +// Base-2 logarithm bithack. +// +// Copyright (C) 2009 Andreas Kloeckner +// Copyright (C) Sean Eron Anderson (in the public domain) +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + + +#ifndef _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP +#define _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP + + +#include +#include + + +namespace mempool +{ + /* from http://graphics.stanford.edu/~seander/bithacks.html */ + + + const char log_table_8[] = + { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + }; + + inline unsigned bitlog2_16(uint16_t v) + { + if (unsigned long t = v >> 8) + return 8+log_table_8[t]; + else + return log_table_8[v]; + } + + inline unsigned bitlog2_32(uint32_t v) + { + if (uint16_t t = v >> 16) + return 16+bitlog2_16(t); + else + return bitlog2_16(v); + } + + inline unsigned bitlog2(unsigned long v) + { +#if (ULONG_MAX != 4294967295) + if (uint32_t t = v >> 32) + return 32+bitlog2_32(t); + else +#endif + return bitlog2_32(v); + } +} + + + + + +#endif diff --git a/viennacl/ocl/mempool/mempool.hpp b/viennacl/ocl/mempool/mempool.hpp new file mode 100644 index 00000000..33fce00d --- /dev/null +++ b/viennacl/ocl/mempool/mempool.hpp @@ -0,0 +1,542 @@ +// Abstract memory pool implementation +// +// Copyright (C) 2009-17 Andreas Kloeckner +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + + +#ifndef VIENNACL_MEMPOOL_MEMPOOL_HPP_ +#define VIENNACL_MEMPOOL_MEMPOOL_HPP_ + + +#include +#include +#include +#include +#include +#include + +#include "viennacl/ocl/error.hpp" + +#ifdef __APPLE__ +#include +#else +#include +#endif + +namespace viennacl +{ +namespace mempool +{ + + // {{{ Allocator + + class cl_allocator_base + { + protected: + tools::shared_ptr m_context; + cl_mem_flags m_flags; + + public: + // NOTE: pyopencl::context -> cl_context + // Q: Should I make this viennacl::context + cl_allocator_base(tools::shared_ptr const &ctx, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : m_context(ctx), m_flags(flags) + { + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) + { + std::cerr << "[Allocator]: cannot specify USE_HOST_PTR or " + "COPY_HOST_PTR flags" << std::endl; + throw viennacl::ocl::invalid_value(); + } + } + + cl_allocator_base(cl_allocator_base const &src) + : m_context(src.m_context), m_flags(src.m_flags) + { } + + virtual ~cl_allocator_base() + { } + + typedef cl_mem cl_mem; + typedef size_t size_t; + + virtual cl_allocator_base *copy() const = 0; + virtual bool is_deferred() const = 0; + virtual cl_mem allocate(size_t s) = 0; + + void free(cl_mem p) + { + cl_int err = clReleaseMemObject(p); + VIENNACL_ERR_CHECK(err); + } + // NOTE: removed the function "try_release_blocks()" + }; + + class cl_deferred_allocator : public cl_allocator_base + { + private: + typedef cl_allocator_base super; + + public: + cl_deferred_allocator(tools::shared_ptr const &ctx, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : super(ctx, flags) + { } + + cl_allocator_base *copy() const + { + return new cl_deferred_allocator(*this); + } + + bool is_deferred() const + { return true; } + + cl_mem allocate(size_t s) + { + return m_context->create_memory_without_smart_handle(m_flags, s, NULL); + } + }; + + class cl_immediate_allocator : public cl_allocator_base + { + private: + typedef cl_allocator_base super; + tools::shared_ptr const & m_queue; + + public: + // NOTE: Changed the declaration as viennacl comman=d queue does nt store + // the context + cl_immediate_allocator(tools::shared_ptr const &ctx, + tools::shared_ptr const &queue, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : super(tools::shared_ptr(ctx), flags), + m_queue(queue) + { } + + cl_immediate_allocator(cl_immediate_allocator const &src) + : super(src), m_queue(src.m_queue) + { } + + cl_allocator_base *copy() const + { + return new cl_immediate_allocator(*this); + } + + bool is_deferred() const + { return false; } + + cl_mem allocate(size_t s) + { + cl_mem ptr = + m_context->create_memory_without_smart_handle(m_flags, s, NULL); + + // Make sure the buffer gets allocated right here and right now. + // This looks (and is) expensive. But immediate allocators + // have their main use in memory pools, whose basic assumption + // is that allocation is too expensive anyway--but they rely + // on exact 'out-of-memory' information. + unsigned zero = 0; + cl_int err = clEnqueueWriteBuffer( + m_queue->handle().get(), + ptr, + /* is blocking */ CL_FALSE, + 0, std::min(s, sizeof(zero)), &zero, + 0, NULL, NULL + ); + VIENNACL_ERR_CHECK(err); + + // No need to wait for completion here. clWaitForEvents (e.g.) + // cannot return mem object allocation failures. This implies that + // the buffer is faulted onto the device on enqueue. + + return ptr; + } + }; + + inline + cl_mem allocator_call(cl_allocator_base &alloc, size_t size) + { + cl_mem mem; + int try_count = 0; + while (try_count < 2) + { + try + { + mem = alloc.allocate(size); + break; + } + catch (viennacl::ocl::mem_object_allocation_failure &e) + { + if (++try_count == 2) + throw; + } + + //NOTE: There was a try_release blocks over here + // which I got rid off. Is that fine? + + // alloc.try_release_blocks(); + } + + try + { + // Note: PyOpenCL retains this buffer, however in ViennaCL, there + // doesn't seem to be any option to not retain it. + return mem; + } + catch (...) + { + cl_int err = clReleaseMemObject(mem); + VIENNACL_ERR_CHECK(err); + throw; + } + } + // }}} + + template + inline T signed_left_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x >> -shift_amount; + else + return x << shift_amount; + } + + template + inline T signed_right_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x << -shift_amount; + else + return x >> shift_amount; + } + + + + + template + class memory_pool : mempool::noncopyable + { + private: + std::map> m_container; + + std::unique_ptr m_allocator; + + // A held block is one that's been released by the application, but that + // we are keeping around to dish out again. + unsigned m_held_blocks; + + // An active block is one that is in use by the application. + unsigned m_active_blocks; + + bool m_stop_holding; + int m_trace; + + public: + memory_pool(Allocator const &alloc=Allocator()) + : m_allocator(alloc.copy()), + m_held_blocks(0), m_active_blocks(0), m_stop_holding(false), + m_trace(false) + { + if (m_allocator->is_deferred()) + { + throw std::runtime_error("Memory pools expect non-deferred " + "semantics from their allocators. You passed a deferred " + "allocator, i.e. an allocator whose allocations can turn out to " + "be unavailable long after allocation."); + } + } + + virtual ~memory_pool() + { free_held(); } + + static const unsigned mantissa_bits = 2; + static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; + + static uint32_t bin_number(size_t size) + { + signed l = bitlog2(size); + size_t shifted = signed_right_shift(size, l-signed(mantissa_bits)); + if (size && (shifted & (1 << mantissa_bits)) == 0) + throw std::runtime_error("memory_pool::bin_number: bitlog2 fault"); + size_t chopped = shifted & mantissa_mask; + return l << mantissa_bits | chopped; + } + + void set_trace(bool flag) + { + if (flag) + ++m_trace; + else + --m_trace; + } + + static size_t alloc_size(uint32_t bin) + { + uint32_t exponent = bin >> mantissa_bits; + uint32_t mantissa = bin & mantissa_mask; + + size_t ones = signed_left_shift(1, + signed(exponent)-signed(mantissa_bits) + ); + if (ones) ones -= 1; + + size_t head = signed_left_shift( + (1< &get_bin(uint32_t bin_nr) + { + typename std::map>::iterator it = m_container.find(bin_nr); + if (it == m_container.end()) + { + auto it_and_inserted = m_container.insert(std::make_pair(bin_nr, std::vector())); + assert(it_and_inserted.second); + return it_and_inserted.first->second; + } + else + return it->second; + } + + void inc_held_blocks() + { + if (m_held_blocks == 0) + start_holding_blocks(); + ++m_held_blocks; + } + + void dec_held_blocks() + { + --m_held_blocks; + if (m_held_blocks == 0) + stop_holding_blocks(); + } + + virtual void start_holding_blocks() + { } + + virtual void stop_holding_blocks() + { } + + public: + cl_mem allocate(size_t size) + { + uint32_t bin_nr = bin_number(size); + std::vector &bin = get_bin(bin_nr); + + if (bin.size()) + { + if (m_trace) + std::cout + << "[pool] allocation of size " << size << " served from bin " << bin_nr + << " which contained " << bin.size() << " entries" << std::endl; + return pop_block_from_bin(bin, size); + } + + size_t alloc_sz = alloc_size(bin_nr); + + assert(bin_number(alloc_sz) == bin_nr); + + if (m_trace) + std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl; + + try { return get_from_allocator(alloc_sz); } + catch (mempool::error &e) + { + if (!e.is_out_of_memory()) + throw; + } + + if (m_trace) + std::cout << "[pool] allocation triggered OOM, running GC" << std::endl; + + m_allocator->try_release_blocks(); + if (bin.size()) + return pop_block_from_bin(bin, size); + + if (m_trace) + std::cout << "[pool] allocation still OOM after GC" << std::endl; + + while (try_to_free_memory()) + { + try { return get_from_allocator(alloc_sz); } + catch (mempool::error &e) + { + if (!e.is_out_of_memory()) + throw; + } + } + + std::cerr << "memory_pool::allocate " + "failed to free memory for allocation" << std::endl; + throw viennacl::ocl::mem_object_allocation_failure(); + + } + + void free(cl_mem p, size_t size) + { + --m_active_blocks; + uint32_t bin_nr = bin_number(size); + + if (!m_stop_holding) + { + inc_held_blocks(); + get_bin(bin_nr).push_back(p); + + if (m_trace) + std::cout << "[pool] block of size " << size << " returned to bin " + << bin_nr << " which now contains " << get_bin(bin_nr).size() + << " entries" << std::endl; + } + else + m_allocator->free(p); + } + + void free_held() + { + for (std::map>::value_type &bin_pair: m_container) + { + std::vector &bin = bin_pair.second; + + while (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + } + } + + assert(m_held_blocks == 0); + } + + void stop_holding() + { + m_stop_holding = true; + free_held(); + } + + unsigned active_blocks() + { return m_active_blocks; } + + unsigned held_blocks() + { return m_held_blocks; } + + bool try_to_free_memory() + { + // free largest stuff first + for (std::map>::value_type &bin_pair: reverse(m_container)) + { + std::vector &bin = bin_pair.second; + + if (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + + return true; + } + } + + return false; + } + + private: + cl_mem get_from_allocator(size_t alloc_sz) + { + cl_mem result = m_allocator->allocate(alloc_sz); + ++m_active_blocks; + + return result; + } + + cl_mem pop_block_from_bin(std::vector &bin, size_t size) + { + cl_mem result = bin.back(); + bin.pop_back(); + + dec_held_blocks(); + ++m_active_blocks; + + return result; + } + }; + + + template + class pooled_allocation : public mempool::noncopyable + { + public: + typedef Pool pool_type; + typedef typename Pool::cl_mem cl_mem; + typedef typename Pool::size_t size_t; + + private: + tools::shared_ptr m_pool; + + cl_mem m_ptr; + size_t m_size; + bool m_valid; + + public: + pooled_allocation(tools::shared_ptr p, size_t size) + : m_pool(p), m_ptr(p->allocate(size)), m_size(size), m_valid(true) + { } + + ~pooled_allocation() + { + if (m_valid) + free(); + } + + void free() + { + if (m_valid) + { + m_pool->free(m_ptr, m_size); + m_valid = false; + } + else + throw mempool::error( + "pooled_device_allocation::free", + CL_INVALID_VALUE + ); + } + + cl_mem ptr() const + { return m_ptr; } + + size_t size() const + { return m_size; } + }; +} +} + +#endif diff --git a/viennacl/ocl/mempool/mempool_utils.hpp b/viennacl/ocl/mempool/mempool_utils.hpp new file mode 100644 index 00000000..2e40be4e --- /dev/null +++ b/viennacl/ocl/mempool/mempool_utils.hpp @@ -0,0 +1,108 @@ +// Various odds and ends +// +// Copyright (C) 2009 Andreas Kloeckner +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + + +#ifndef VIENNACL_MEMPOOL_UTILS_HPP +#define VIENNACL_MEMPOOL_UTILS_HPP + +#include +#include + + + +namespace viennacl +{ +namespace mempool +{ + + // {{{ error + class error : public std::runtime_error + { + private: + std::string m_routine; + cl_int m_code; + + // This is here because clLinkProgram returns a program + // object *just* so that there is somewhere for it to + // stuff the linker logs. :/ + bool m_program_initialized; + cl_program m_program; + + public: + error(const char *routine, cl_int c, const char *msg="") + : std::runtime_error(msg), m_routine(routine), m_code(c), + m_program_initialized(false), m_program(nullptr) + { } + + error(const char *routine, cl_program prg, cl_int c, + const char *msg="") + : std::runtime_error(msg), m_routine(routine), m_code(c), + m_program_initialized(true), m_program(prg) + { } + + virtual ~error() + { + if (m_program_initialized) + clReleaseProgram(m_program); + } + + const std::string &routine() const + { + return m_routine; + } + + cl_int code() const + { + return m_code; + } + + bool is_out_of_memory() const + { + return (code() == CL_MEM_OBJECT_ALLOCATION_FAILURE + || code() == CL_OUT_OF_RESOURCES + || code() == CL_OUT_OF_HOST_MEMORY); + } + }; + + // }}} + + + // https://stackoverflow.com/a/44175911 + class noncopyable { + public: + noncopyable() = default; + ~noncopyable() = default; + + private: + noncopyable(const noncopyable&) = delete; + noncopyable& operator=(const noncopyable&) = delete; + }; + +} +} + +#endif + +// vim:foldmethod=marker diff --git a/viennacl/tools/mempool.hpp b/viennacl/tools/mempool.hpp new file mode 100644 index 00000000..48dbff04 --- /dev/null +++ b/viennacl/tools/mempool.hpp @@ -0,0 +1,382 @@ +// Abstract memory pool implementation +// +// Copyright (C) 2009-17 Andreas Kloeckner +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + + +#ifndef VIENNACL_MEMPOOL_MEMPOOL_HPP_ +#define VIENNACL_MEMPOOL_MEMPOOL_HPP_ + + +#include +#include +#include +#include +#include +#include +#include "viennacl/mempool/bitlog.hpp" +#include "viennacl/mempool/mempool_utils.hpp" + +namespace viennacl +{ +namespace mempool +{ + template + inline T signed_left_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x >> -shift_amount; + else + return x << shift_amount; + } + + + + + template + inline T signed_right_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x << -shift_amount; + else + return x >> shift_amount; + } + + + + + template + class memory_pool : noncopyable + { + public: + typedef typename Allocator::pointer_type pointer_type; + typedef typename Allocator::size_type size_type; + + private: + typedef uint32_t bin_nr_t; + typedef std::vector bin_t; + + typedef std::map container_t; + container_t m_container; + typedef typename container_t::value_type bin_pair_t; + + std::unique_ptr m_allocator; + + // A held block is one that's been released by the application, but that + // we are keeping around to dish out again. + unsigned m_held_blocks; + + // An active block is one that is in use by the application. + unsigned m_active_blocks; + + bool m_stop_holding; + int m_trace; + + public: + memory_pool(Allocator const &alloc=Allocator()) + : m_allocator(alloc.copy()), + m_held_blocks(0), m_active_blocks(0), m_stop_holding(false), + m_trace(false) + { + if (m_allocator->is_deferred()) + { + throw std::runtime_error("Memory pools expect non-deferred " + "semantics from their allocators. You passed a deferred " + "allocator, i.e. an allocator whose allocations can turn out to " + "be unavailable long after allocation."); + } + } + + virtual ~memory_pool() + { free_held(); } + + static const unsigned mantissa_bits = 2; + static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; + + static bin_nr_t bin_number(size_type size) + { + signed l = bitlog2(size); + size_type shifted = signed_right_shift(size, l-signed(mantissa_bits)); + if (size && (shifted & (1 << mantissa_bits)) == 0) + throw std::runtime_error("memory_pool::bin_number: bitlog2 fault"); + size_type chopped = shifted & mantissa_mask; + return l << mantissa_bits | chopped; + } + + void set_trace(bool flag) + { + if (flag) + ++m_trace; + else + --m_trace; + } + + static size_type alloc_size(bin_nr_t bin) + { + bin_nr_t exponent = bin >> mantissa_bits; + bin_nr_t mantissa = bin & mantissa_mask; + + size_type ones = signed_left_shift(1, + signed(exponent)-signed(mantissa_bits) + ); + if (ones) ones -= 1; + + size_type head = signed_left_shift( + (1<second; + } + else + return it->second; + } + + void inc_held_blocks() + { + if (m_held_blocks == 0) + start_holding_blocks(); + ++m_held_blocks; + } + + void dec_held_blocks() + { + --m_held_blocks; + if (m_held_blocks == 0) + stop_holding_blocks(); + } + + virtual void start_holding_blocks() + { } + + virtual void stop_holding_blocks() + { } + + public: + pointer_type allocate(size_type size) + { + bin_nr_t bin_nr = bin_number(size); + bin_t &bin = get_bin(bin_nr); + + if (bin.size()) + { + if (m_trace) + std::cout + << "[pool] allocation of size " << size << " served from bin " << bin_nr + << " which contained " << bin.size() << " entries" << std::endl; + return pop_block_from_bin(bin, size); + } + + size_type alloc_sz = alloc_size(bin_nr); + + assert(bin_number(alloc_sz) == bin_nr); + + if (m_trace) + std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl; + + try { return get_from_allocator(alloc_sz); } + catch (mempool::error &e) + { + if (!e.is_out_of_memory()) + throw; + } + + if (m_trace) + std::cout << "[pool] allocation triggered OOM, running GC" << std::endl; + + m_allocator->try_release_blocks(); + if (bin.size()) + return pop_block_from_bin(bin, size); + + if (m_trace) + std::cout << "[pool] allocation still OOM after GC" << std::endl; + + while (try_to_free_memory()) + { + try { return get_from_allocator(alloc_sz); } + catch (mempool::error &e) + { + if (!e.is_out_of_memory()) + throw; + } + } + + throw error( + "memory_pool::allocate", + CL_MEM_OBJECT_ALLOCATION_FAILURE, + "failed to free memory for allocation"); + } + + void free(pointer_type p, size_type size) + { + --m_active_blocks; + bin_nr_t bin_nr = bin_number(size); + + if (!m_stop_holding) + { + inc_held_blocks(); + get_bin(bin_nr).push_back(p); + + if (m_trace) + std::cout << "[pool] block of size " << size << " returned to bin " + << bin_nr << " which now contains " << get_bin(bin_nr).size() + << " entries" << std::endl; + } + else + m_allocator->free(p); + } + + void free_held() + { + for (bin_pair_t &bin_pair: m_container) + { + bin_t &bin = bin_pair.second; + + while (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + } + } + + assert(m_held_blocks == 0); + } + + void stop_holding() + { + m_stop_holding = true; + free_held(); + } + + unsigned active_blocks() + { return m_active_blocks; } + + unsigned held_blocks() + { return m_held_blocks; } + + bool try_to_free_memory() + { + // free largest stuff first + for (bin_pair_t &bin_pair: reverse(m_container)) + { + bin_t &bin = bin_pair.second; + + if (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + + return true; + } + } + + return false; + } + + private: + pointer_type get_from_allocator(size_type alloc_sz) + { + pointer_type result = m_allocator->allocate(alloc_sz); + ++m_active_blocks; + + return result; + } + + pointer_type pop_block_from_bin(bin_t &bin, size_type size) + { + pointer_type result = bin.back(); + bin.pop_back(); + + dec_held_blocks(); + ++m_active_blocks; + + return result; + } + }; + + + template + class pooled_allocation : public noncopyable + { + public: + typedef Pool pool_type; + typedef typename Pool::pointer_type pointer_type; + typedef typename Pool::size_type size_type; + + private: + std::shared_ptr m_pool; + + pointer_type m_ptr; + size_type m_size; + bool m_valid; + + public: + pooled_allocation(std::shared_ptr p, size_type size) + : m_pool(p), m_ptr(p->allocate(size)), m_size(size), m_valid(true) + { } + + ~pooled_allocation() + { + if (m_valid) + free(); + } + + void free() + { + if (m_valid) + { + m_pool->free(m_ptr, m_size); + m_valid = false; + } + else + throw mempool::error( + "pooled_device_allocation::free", + CL_INVALID_VALUE + ); + } + + pointer_type ptr() const + { return m_ptr; } + + size_type size() const + { return m_size; } + }; +} +} + +#endif -- GitLab From 8c0af887689107846ecb7f3850025956b8bd7bac Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 11 Nov 2018 19:46:37 -0600 Subject: [PATCH 02/46] added more helper functions for the memory pool. --- viennacl/backend/mem_handle.hpp | 28 ++++++++++++++++++++++++++++ viennacl/backend/memory.hpp | 4 ++-- viennacl/backend/opencl.hpp | 4 ++-- viennacl/detail/vector_def.hpp | 4 +++- viennacl/ocl/context.hpp | 8 ++++++++ viennacl/ocl/handle.hpp | 28 ++++++++++------------------ viennacl/vector.hpp | 8 ++++++-- 7 files changed, 59 insertions(+), 25 deletions(-) diff --git a/viennacl/backend/mem_handle.hpp b/viennacl/backend/mem_handle.hpp index 37c680ba..a2e22a46 100644 --- a/viennacl/backend/mem_handle.hpp +++ b/viennacl/backend/mem_handle.hpp @@ -24,6 +24,7 @@ #include #include +#include #include "viennacl/forwards.h" #include "viennacl/tools/shared_ptr.hpp" #include "viennacl/backend/cpu_ram.hpp" @@ -149,6 +150,19 @@ public: } } + void used_mempool(bool u) + { + p_used_mempool = u; +#ifndef VIENNACL_WITH_OPENCL + std::cerr << "Memory pool allocation for non-OpenCL backends not supported yet.\n"; + throw std::exception(); +#endif +#ifdef VIENNACL_WITH_OPENCL + opencl_handle_.used_mempol(u); +#endif + } + + /** @brief Compares the two handles and returns true if the active memory handles in the two mem_handles point to the same buffer. */ bool operator==(mem_handle const & other) const { @@ -232,7 +246,21 @@ public: /** @brief Sets the size of the currently active buffer. Use with care! */ void raw_size(vcl_size_t new_size) { size_in_bytes_ = new_size; } + ~mem_handle() + { + if(p_used_mempool) + { + +#ifdef VIENNACL_WITH_OPENCL + viennacl::ocl::context ctx = opencl_handle_.context(); + ctx.deallocate_memory_in_pool(opencl_handle_, raw_size()); +#endif + + } + } + private: + bool p_used_mempool = false; memory_types active_handle_; ram_handle_type ram_handle_; #ifdef VIENNACL_WITH_OPENCL diff --git a/viennacl/backend/memory.hpp b/viennacl/backend/memory.hpp index 1b1c6c53..e1d85c4c 100644 --- a/viennacl/backend/memory.hpp +++ b/viennacl/backend/memory.hpp @@ -84,7 +84,7 @@ namespace backend * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. * */ - inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL) + inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL, bool use_mempool = false) { if (size_in_bytes > 0) { @@ -100,7 +100,7 @@ namespace backend #ifdef VIENNACL_WITH_OPENCL case OPENCL_MEMORY: handle.opencl_handle().context(ctx.opencl_context()); - handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr); + handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr, use_mempool); handle.raw_size(size_in_bytes); break; #endif diff --git a/viennacl/backend/opencl.hpp b/viennacl/backend/opencl.hpp index a8be55a7..66b5c22a 100644 --- a/viennacl/backend/opencl.hpp +++ b/viennacl/backend/opencl.hpp @@ -52,10 +52,10 @@ namespace opencl * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) * */ -inline cl_mem memory_create(viennacl::ocl::context const & ctx, vcl_size_t size_in_bytes, const void * host_ptr = NULL) +inline cl_mem memory_create(viennacl::ocl::context const & ctx, vcl_size_t size_in_bytes, const void * host_ptr = NULL, bool use_mempool = false) { //std::cout << "Creating buffer (" << size_in_bytes << " bytes) host buffer " << host_ptr << " in context " << &ctx << std::endl; - return ctx.create_memory_without_smart_handle(CL_MEM_READ_WRITE, static_cast(size_in_bytes), const_cast(host_ptr)); + return ctx.create_memory_without_smart_handle(CL_MEM_READ_WRITE, static_cast(size_in_bytes), const_cast(host_ptr), use_mempool); } /** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' in the OpenCL context to memory starting at address 'dst_buffer + dst_offset' in the same OpenCL context. diff --git a/viennacl/detail/vector_def.hpp b/viennacl/detail/vector_def.hpp index 4624b762..18c9bd2c 100644 --- a/viennacl/detail/vector_def.hpp +++ b/viennacl/detail/vector_def.hpp @@ -144,7 +144,8 @@ public: explicit vector_base(viennacl::backend::mem_handle & h, size_type vec_size, size_type vec_start, size_type vec_stride); /** @brief Creates a vector and allocates the necessary memory */ - explicit vector_base(size_type vec_size, viennacl::context ctx = viennacl::context()); + explicit vector_base(size_type vec_size, viennacl::context ctx = viennacl::context(), + bool use_mempool = false); // CUDA or host memory: explicit vector_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type vec_size, vcl_size_t start = 0, size_type stride = 1); @@ -203,6 +204,7 @@ public: self_type & operator=(const vector_expression< const matrix_expression< const matrix_base, const matrix_base, op_trans >, const vector_base, op_prod> & proxy); + ~vector_base(); ///////////////////////////// Matrix Vector interaction end /////////////////////////////////// diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 24981c87..5861a477 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -338,6 +338,14 @@ public: return mem; } + /// [KK]: TODOTODOTODOTODO + + void deallocate_memory_in_pool(viennacl::ocl::handle& mem_handle, size_t size) + { + get_mempool()->free(mem_handle.get(), size); + return; + } + /** @brief Creates a memory buffer within the context * diff --git a/viennacl/ocl/handle.hpp b/viennacl/ocl/handle.hpp index 152220ac..7d4603b4 100644 --- a/viennacl/ocl/handle.hpp +++ b/viennacl/ocl/handle.hpp @@ -64,12 +64,6 @@ namespace viennacl cl_int err = clReleaseMemObject(something); VIENNACL_ERR_CHECK(err); } - - static void dec_via_mempool(viennacl::ocl::context const & ctx, cl_mem & something) - { - cl_int err = clReleaseMemObject(something); - VIENNACL_ERR_CHECK(err); - } }; //cl_program: @@ -90,9 +84,6 @@ namespace viennacl VIENNACL_ERR_CHECK(err); #endif } - - static void dec_via_mempool(viennacl::ocl::context const & ctx, - cl_program& something){} }; //cl_kernel: @@ -113,7 +104,6 @@ namespace viennacl VIENNACL_ERR_CHECK(err); #endif } - static void dec_via_mempool(viennacl::ocl::context const & ctx, cl_kernel& something){} }; //cl_command_queue: @@ -134,7 +124,6 @@ namespace viennacl VIENNACL_ERR_CHECK(err); #endif } - static void dec_via_mempool(viennacl::ocl::context const & ctx, cl_command_queue& something){} }; //cl_context: @@ -155,7 +144,6 @@ namespace viennacl VIENNACL_ERR_CHECK(err); #endif } - static void dec_via_mempool(viennacl::ocl::context const & ctx, cl_context& something){} }; /** \endcond */ @@ -163,12 +151,12 @@ namespace viennacl template class handle { - bool used_mempool = false; + bool used_mempool_ = false; public: handle() : h_(0), p_context_(NULL) {} - handle(const OCL_TYPE & something, viennacl::ocl::context const & c, bool _u = false) : used_mempool(_u), h_(something), p_context_(&c) + handle(const OCL_TYPE & something, viennacl::ocl::context const & c, bool u = false) : used_mempool_(u), h_(something), p_context_(&c) { - if((typeid(OCL_TYPE) != typeid(cl_mem)) && used_mempool) + if((typeid(OCL_TYPE) != typeid(cl_mem)) && used_mempool_) { std::cerr << "[handle]: memory pool is only available for memory objects." << std::endl; throw std::exception(); @@ -217,6 +205,7 @@ namespace viennacl return *p_context_; } void context(viennacl::ocl::context const & c) { p_context_ = &c; } + void used_mempool(bool u) { used_mempool_ = u; } /** @brief Swaps the OpenCL handle of two handle objects */ @@ -237,10 +226,13 @@ namespace viennacl void inc() { handle_inc_dec_helper::inc(h_); } /** @brief Manually decrement the OpenCL reference count. Typically called automatically, but might be useful with user-supplied memory objects. */ void dec() { - if(used_mempool) + if(!used_mempool_) + { + // only handling the freeing of memory through this class if there + // is no mempool, otherwise for now handling it through the class + // vector base. or whatever equivalent. handle_inc_dec_helper::dec(h_); - else - handle_inc_dec_helper::dec_via_mempool(context(), h_); + } } private: OCL_TYPE h_; diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index 69bf3d4c..4ca21944 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -256,16 +256,20 @@ vector_base::vector_base(viennacl::backend::mem_hand : size_(vec_size), start_(vec_start), stride_(vec_stride), internal_size_(vec_size), elements_(h) {} template -vector_base::vector_base(size_type vec_size, viennacl::context ctx) +vector_base::vector_base(size_type vec_size, viennacl::context ctx, bool use_mempool) : size_(vec_size), start_(0), stride_(1), internal_size_(viennacl::tools::align_to_multiple(size_, dense_padding_size)) { if (size_ > 0) { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx); +#ifdef VIENNACL_WITH_OPENCL + elements_.used_mempool(use_mempool); +#endif + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx, NULL, use_mempool); clear(); } } + // CUDA or host memory: template vector_base::vector_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type vec_size, vcl_size_t start, size_type stride) -- GitLab From b68b0684094ed01984fd2232e6b6f2416ccb8a22 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 11 Nov 2018 23:54:51 -0600 Subject: [PATCH 03/46] Compiles successfully. Still giving seg fault however, need to debug debug debug --- viennacl/backend/mem_handle.hpp | 2 +- viennacl/detail/vector_def.hpp | 1 - viennacl/ocl/context.hpp | 370 ++++++++++++++++++++++--- viennacl/ocl/mempool/bitlog.hpp | 6 +- viennacl/ocl/mempool/mempool.hpp | 21 -- viennacl/ocl/mempool/mempool_utils.hpp | 17 ++ 6 files changed, 350 insertions(+), 67 deletions(-) diff --git a/viennacl/backend/mem_handle.hpp b/viennacl/backend/mem_handle.hpp index a2e22a46..33832ac4 100644 --- a/viennacl/backend/mem_handle.hpp +++ b/viennacl/backend/mem_handle.hpp @@ -158,7 +158,7 @@ public: throw std::exception(); #endif #ifdef VIENNACL_WITH_OPENCL - opencl_handle_.used_mempol(u); + opencl_handle_.used_mempool(u); #endif } diff --git a/viennacl/detail/vector_def.hpp b/viennacl/detail/vector_def.hpp index 18c9bd2c..4cc98d46 100644 --- a/viennacl/detail/vector_def.hpp +++ b/viennacl/detail/vector_def.hpp @@ -204,7 +204,6 @@ public: self_type & operator=(const vector_expression< const matrix_expression< const matrix_base, const matrix_base, op_trans >, const vector_base, op_prod> & proxy); - ~vector_base(); ///////////////////////////// Matrix Vector interaction end /////////////////////////////////// diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 5861a477..27d44510 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -32,8 +32,15 @@ #include #include #include +#include #include #include +#include +#include +#include +#include +#include +#include #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/error.hpp" #include "viennacl/ocl/handle.hpp" @@ -43,7 +50,6 @@ #include "viennacl/ocl/platform.hpp" #include "viennacl/ocl/command_queue.hpp" #include "viennacl/tools/sha1.hpp" -#include "viennacl/tools/shared_ptr.hpp" #include "viennacl/ocl/mempool/bitlog.hpp" #include "viennacl/ocl/mempool/mempool_utils.hpp" #include "viennacl/tools/shared_ptr.hpp" @@ -68,26 +74,13 @@ namespace ocl virtual ~cl_allocator_base(); - virtual cl_allocator_base *copy() const; - virtual bool is_deferred() const; - virtual cl_mem allocate(size_t); + virtual cl_allocator_base *copy() const = 0; + virtual bool is_deferred() const = 0; + virtual cl_mem allocate(size_t) = 0; void free(cl_mem ); }; - class cl_deferred_allocator : public cl_allocator_base - { - public: - cl_deferred_allocator(tools::shared_ptr const&, - cl_mem_flags); - - cl_allocator_base *copy() const; - - bool is_deferred() const; - - cl_mem allocate(size_t ); - }; - class cl_immediate_allocator : public cl_allocator_base { private: @@ -96,13 +89,24 @@ namespace ocl public: // NOTE: Changed the declaration as viennacl comman=d queue does nt store // the context - cl_immediate_allocator(tools::shared_ptr const&, - tools::shared_ptr const &, - cl_mem_flags); - - cl_immediate_allocator(cl_immediate_allocator const &); + // + + cl_immediate_allocator(tools::shared_ptr const &ctx, + tools::shared_ptr const &queue, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : cl_allocator_base(tools::shared_ptr(ctx), flags), + m_queue(queue) + { } + + cl_immediate_allocator(cl_immediate_allocator const &src) + : cl_allocator_base(src), m_queue(src.m_queue) + { } + + cl_immediate_allocator *copy() const + { + return new cl_immediate_allocator(*this); + } - cl_allocator_base *copy() const; bool is_deferred() const; @@ -115,7 +119,9 @@ namespace ocl private: std::map> m_container; - + + // TODO -- [KK:] Looks like ViennaCL does not assume that the user has + // C++11 standard compatible compiler, should this be auto_ptr? std::unique_ptr m_allocator; // A held block is one that's been released by the application, but that @@ -128,33 +134,240 @@ namespace ocl bool m_stop_holding; int m_trace; - cl_mem get_from_allocator(size_t ); - cl_mem pop_block_from_bin(std::vector& , size_t ); + cl_mem get_from_allocator(size_t alloc_sz) + { + cl_mem result = m_allocator->allocate(alloc_sz); + ++m_active_blocks; + + return result; + } + + cl_mem pop_block_from_bin(std::vector &bin, size_t size) + { + cl_mem result = bin.back(); + bin.pop_back(); + + dec_held_blocks(); + ++m_active_blocks; + + return result; + } public: - memory_pool(Allocator const&); + memory_pool(Allocator const &alloc=Allocator()) + : m_allocator(alloc.copy()), + m_held_blocks(0), m_active_blocks(0), m_stop_holding(false), + m_trace(false) + { + if (m_allocator->is_deferred()) + { + throw std::runtime_error("Memory pools expect non-deferred " + "semantics from their allocators. You passed a deferred " + "allocator, i.e. an allocator whose allocations can turn out to " + "be unavailable long after allocation."); + } + } virtual ~memory_pool(); static const unsigned mantissa_bits = 2; static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; - static uint32_t bin_number(size_t ); - void set_trace(bool ); - static size_t alloc_size(uint32_t ); + static uint32_t bin_number(size_t size) + { + signed l = viennacl::mempool::bitlog2(size); + size_t shifted = viennacl::mempool::signed_right_shift(size, l-signed(mantissa_bits)); + if (size && (shifted & (1 << mantissa_bits)) == 0) + throw std::runtime_error("memory_pool::bin_number: bitlog2 fault"); + size_t chopped = shifted & mantissa_mask; + return l << mantissa_bits | chopped; + } + void set_trace(bool flag) + { + if (flag) + ++m_trace; + else + --m_trace; + } - cl_mem allocate(size_t ); - void free(cl_mem , size_t ); - void free_held(); - void stop_holding(); - unsigned active_blocks(); - unsigned held_blocks(); - bool try_to_free_memory(); + static size_t alloc_size(uint32_t bin) + { + uint32_t exponent = bin >> mantissa_bits; + uint32_t mantissa = bin & mantissa_mask; + + size_t ones = viennacl::mempool::signed_left_shift(1, + signed(exponent)-signed(mantissa_bits) + ); + if (ones) ones -= 1; + + size_t head = viennacl::mempool::signed_left_shift( + (1< &bin = get_bin(bin_nr); + + if (bin.size()) + { + if (m_trace) + std::cout + << "[pool] allocation of size " << size << " served from bin " << bin_nr + << " which contained " << bin.size() << " entries" << std::endl; + return pop_block_from_bin(bin, size); + } + + size_t alloc_sz = alloc_size(bin_nr); + + assert(bin_number(alloc_sz) == bin_nr); + + if (m_trace) + std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl; + + try { return get_from_allocator(alloc_sz); } + catch (mempool::error &e) + { + if (!e.is_out_of_memory()) + throw; + } + + if (m_trace) + std::cout << "[pool] allocation triggered OOM, running GC" << std::endl; + + // KK: removed it., didn't seem useful to me. + // m_allocator->try_release_blocks(); + if (bin.size()) + return pop_block_from_bin(bin, size); + + if (m_trace) + std::cout << "[pool] allocation still OOM after GC" << std::endl; + + while (try_to_free_memory()) + { + try { return get_from_allocator(alloc_sz); } + catch (mempool::error &e) + { + if (!e.is_out_of_memory()) + throw; + } + } + + std::cerr << "memory_pool::allocate " + "failed to free memory for allocation" << std::endl; + throw viennacl::ocl::mem_object_allocation_failure(); + + } + + void free(cl_mem p, size_t size) + { + --m_active_blocks; + uint32_t bin_nr = bin_number(size); + + if (!m_stop_holding) + { + inc_held_blocks(); + get_bin(bin_nr).push_back(p); + + if (m_trace) + std::cout << "[pool] block of size " << size << " returned to bin " + << bin_nr << " which now contains " << get_bin(bin_nr).size() + << " entries" << std::endl; + } + else + m_allocator->free(p); + } + + void free_held() + { + std::map>::reverse_iterator bin_pair = + m_container.rbegin(); + while (bin_pair != m_container.rend()) + { + std::vector &bin = bin_pair->second; + + while (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + } + } + + assert(m_held_blocks == 0); + } + + void stop_holding() + { + m_stop_holding = true; + free_held(); + } + + unsigned active_blocks() + { return m_active_blocks; } + + unsigned held_blocks() + { return m_held_blocks; } + bool try_to_free_memory() + { + + std::map>::reverse_iterator bin_pair = + m_container.rbegin(); + // free largest stuff first + while (bin_pair != m_container.rend()) + { + std::vector &bin = bin_pair->second; + + if (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + + return true; + } + } + + return false; + } protected: - std::vector &get_bin(uint32_t ); - void inc_held_blocks(); - void dec_held_blocks(); + std::vector &get_bin(uint32_t bin_nr) + { + typename std::map>::iterator it = m_container.find(bin_nr); + if (it == m_container.end()) + { + auto it_and_inserted = m_container.insert(std::make_pair(bin_nr, std::vector())); + assert(it_and_inserted.second); + return it_and_inserted.first->second; + } + else + return it->second; + } + + void inc_held_blocks() + { + if (m_held_blocks == 0) + start_holding_blocks(); + ++m_held_blocks; + } + + void dec_held_blocks() + { + --m_held_blocks; + if (m_held_blocks == 0) + stop_holding_blocks(); + } + + virtual void start_holding_blocks() + { } + + virtual void stop_holding_blocks() + { } - virtual void start_holding_blocks(); - virtual void stop_holding_blocks(); }; // }}} @@ -968,6 +1181,79 @@ inline void viennacl::ocl::kernel::set_work_size_defaults() } } +// {{{ viennacl::ocl::cl_allocator_base definition + +// CTOR +cl_allocator_base::cl_allocator_base(tools::shared_ptr const &ctx, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : m_context(ctx), m_flags(flags) +{ + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) + { + std::cerr << "[Allocator]: cannot specify USE_HOST_PTR or " + "COPY_HOST_PTR flags" << std::endl; + throw viennacl::ocl::invalid_value(); + } +} + + +// Copy CTOR +cl_allocator_base::cl_allocator_base(cl_allocator_base const &src) +: m_context(src.m_context), m_flags(src.m_flags) +{ } + + +cl_allocator_base::~cl_allocator_base() +{ } + +void cl_allocator_base::free(cl_mem p) +{ + cl_int err = clReleaseMemObject(p); + VIENNACL_ERR_CHECK(err); +} + +// }}} + +// {{{ definitionof cl_immediate_allocator + + +bool cl_immediate_allocator::is_deferred() const +{ return false; } + +cl_mem cl_immediate_allocator::allocate(size_t s) +{ + cl_mem ptr = + m_context->create_memory_without_smart_handle(m_flags, s, NULL); + + // Make sure the buffer gets allocated right here and right now. + // This looks (and is) expensive. But immediate allocators + // have their main use in memory pools, whose basic assumption + // is that allocation is too expensive anyway--but they rely + // on exact 'out-of-memory' information. + unsigned zero = 0; + cl_int err = clEnqueueWriteBuffer( + m_queue->handle().get(), + ptr, + /* is blocking */ CL_FALSE, + 0, std::min(s, sizeof(zero)), &zero, + 0, NULL, NULL + ); + VIENNACL_ERR_CHECK(err); + + // No need to wait for completion here. clWaitForEvents (e.g.) + // cannot return mem object allocation failures. This implies that + // the buffer is faulted onto the device on enqueue. + + return ptr; +} + +// }}} + +template +memory_pool::~memory_pool() +{ free_held(); } + + } } diff --git a/viennacl/ocl/mempool/bitlog.hpp b/viennacl/ocl/mempool/bitlog.hpp index 23dac7de..5a0ba633 100644 --- a/viennacl/ocl/mempool/bitlog.hpp +++ b/viennacl/ocl/mempool/bitlog.hpp @@ -32,7 +32,8 @@ #include #include - +namespace viennacl +{ namespace mempool { /* from http://graphics.stanford.edu/~seander/bithacks.html */ @@ -83,7 +84,8 @@ namespace mempool #endif return bitlog2_32(v); } -} +} // namespace mempool +} // namespace viennacl diff --git a/viennacl/ocl/mempool/mempool.hpp b/viennacl/ocl/mempool/mempool.hpp index 33fce00d..6e591c19 100644 --- a/viennacl/ocl/mempool/mempool.hpp +++ b/viennacl/ocl/mempool/mempool.hpp @@ -213,27 +213,6 @@ namespace mempool } // }}} - template - inline T signed_left_shift(T x, signed shift_amount) - { - if (shift_amount < 0) - return x >> -shift_amount; - else - return x << shift_amount; - } - - template - inline T signed_right_shift(T x, signed shift_amount) - { - if (shift_amount < 0) - return x << -shift_amount; - else - return x >> shift_amount; - } - - - - template class memory_pool : mempool::noncopyable { diff --git a/viennacl/ocl/mempool/mempool_utils.hpp b/viennacl/ocl/mempool/mempool_utils.hpp index 2e40be4e..639ee218 100644 --- a/viennacl/ocl/mempool/mempool_utils.hpp +++ b/viennacl/ocl/mempool/mempool_utils.hpp @@ -88,6 +88,23 @@ namespace mempool // }}} + template + inline T signed_left_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x >> -shift_amount; + else + return x << shift_amount; + } + + template + inline T signed_right_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x << -shift_amount; + else + return x >> shift_amount; + } // https://stackoverflow.com/a/44175911 class noncopyable { -- GitLab From 28452cd1b68a16bece5629005d2a138bbe100e74 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Nov 2018 01:03:35 -0600 Subject: [PATCH 04/46] Code compiling + running for non-mempool allocations. --- viennacl/ocl/context.hpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 27d44510..6e19b11a 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -390,12 +390,6 @@ public: pf_index_(0), current_queue_id_(0) { - allocators_[0] = new - cl_immediate_allocator(tools::shared_ptr(this), - tools::shared_ptr(&get_queue()), - CL_MEM_READ_WRITE); - mempools_[0] = new - memory_pool(*allocators_[0]); if (std::getenv("VIENNACL_CACHE_PATH")) cache_path_ = std::getenv("VIENNACL_CACHE_PATH"); else @@ -593,6 +587,18 @@ public: viennacl::ocl::handle queue_handle(q, *this); queues_[dev].push_back(viennacl::ocl::command_queue(queue_handle)); queues_[dev].back().handle().inc(); + + if(queues_.find(dev) == queues_.end()) + { + // did not find a queue for the present device, need to allot an + // allocator. + allocators_[dev] = new + cl_immediate_allocator(tools::shared_ptr(this), + tools::shared_ptr(&(queues_[dev][0])), + CL_MEM_READ_WRITE); + mempools_[dev] = new + memory_pool(*allocators_[dev]); + } } /** @brief Adds a queue for the given device to the context */ -- GitLab From 38f1ed9a4b54e3c1edb5934aa3729d3c5ef5f068 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Nov 2018 09:45:47 -0600 Subject: [PATCH 05/46] Got rid of the segmentation fault, need to do exhaustive testing --- viennacl/ocl/context.hpp | 219 +++++++++++++------------ viennacl/ocl/mempool/mempool_utils.hpp | 16 +- viennacl/vector.hpp | 2 +- 3 files changed, 132 insertions(+), 105 deletions(-) diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 6e19b11a..73b7df4d 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -84,7 +84,7 @@ namespace ocl class cl_immediate_allocator : public cl_allocator_base { private: - tools::shared_ptr const & m_queue; + tools::shared_ptr m_queue; public: // NOTE: Changed the declaration as viennacl comman=d queue does nt store @@ -113,15 +113,24 @@ namespace ocl cl_mem allocate(size_t ); }; + + /// {{{ + template class memory_pool : mempool::noncopyable { + public: + typedef cl_mem pointer_type; + typedef size_t size_type; + private: + typedef uint32_t bin_nr_t; + typedef std::vector bin_t; + + typedef std::map container_t; + container_t m_container; + typedef typename container_t::value_type bin_pair_t; - std::map> m_container; - - // TODO -- [KK:] Looks like ViennaCL does not assume that the user has - // C++11 standard compatible compiler, should this be auto_ptr? std::unique_ptr m_allocator; // A held block is one that's been released by the application, but that @@ -134,25 +143,6 @@ namespace ocl bool m_stop_holding; int m_trace; - cl_mem get_from_allocator(size_t alloc_sz) - { - cl_mem result = m_allocator->allocate(alloc_sz); - ++m_active_blocks; - - return result; - } - - cl_mem pop_block_from_bin(std::vector &bin, size_t size) - { - cl_mem result = bin.back(); - bin.pop_back(); - - dec_held_blocks(); - ++m_active_blocks; - - return result; - } - public: memory_pool(Allocator const &alloc=Allocator()) : m_allocator(alloc.copy()), @@ -161,24 +151,31 @@ namespace ocl { if (m_allocator->is_deferred()) { - throw std::runtime_error("Memory pools expect non-deferred " + std::cerr << "Memory pools expect non-deferred " "semantics from their allocators. You passed a deferred " "allocator, i.e. an allocator whose allocations can turn out to " - "be unavailable long after allocation."); + "be unavailable long after allocation.\n"; + throw std::exception(); } } - virtual ~memory_pool(); + + virtual ~memory_pool() + { free_held(); } + static const unsigned mantissa_bits = 2; static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; - static uint32_t bin_number(size_t size) + + static bin_nr_t bin_number(size_type size) { signed l = viennacl::mempool::bitlog2(size); - size_t shifted = viennacl::mempool::signed_right_shift(size, l-signed(mantissa_bits)); + size_type shifted = viennacl::mempool::signed_right_shift(size, + l-signed(mantissa_bits)); if (size && (shifted & (1 << mantissa_bits)) == 0) throw std::runtime_error("memory_pool::bin_number: bitlog2 fault"); - size_t chopped = shifted & mantissa_mask; + size_type chopped = shifted & mantissa_mask; return l << mantissa_bits | chopped; } + void set_trace(bool flag) { if (flag) @@ -187,17 +184,17 @@ namespace ocl --m_trace; } - static size_t alloc_size(uint32_t bin) + static size_type alloc_size(bin_nr_t bin) { - uint32_t exponent = bin >> mantissa_bits; - uint32_t mantissa = bin & mantissa_mask; + bin_nr_t exponent = bin >> mantissa_bits; + bin_nr_t mantissa = bin & mantissa_mask; - size_t ones = viennacl::mempool::signed_left_shift(1, + size_type ones = viennacl::mempool::signed_left_shift(1, signed(exponent)-signed(mantissa_bits) ); if (ones) ones -= 1; - size_t head = viennacl::mempool::signed_left_shift( + size_type head = viennacl::mempool::signed_left_shift( (1< &bin = get_bin(bin_nr); + typename container_t::iterator it = m_container.find(bin_nr); + if (it == m_container.end()) + { + auto it_and_inserted = m_container.insert(std::make_pair(bin_nr, bin_t())); + assert(it_and_inserted.second); + return it_and_inserted.first->second; + } + else + return it->second; + } + + void inc_held_blocks() + { + if (m_held_blocks == 0) + start_holding_blocks(); + ++m_held_blocks; + } + + void dec_held_blocks() + { + --m_held_blocks; + if (m_held_blocks == 0) + stop_holding_blocks(); + } + + virtual void start_holding_blocks() + { } + + virtual void stop_holding_blocks() + { } + + public: + pointer_type allocate(size_type size) + { + bin_nr_t bin_nr = bin_number(size); + bin_t &bin = get_bin(bin_nr); if (bin.size()) { @@ -219,7 +251,7 @@ namespace ocl return pop_block_from_bin(bin, size); } - size_t alloc_sz = alloc_size(bin_nr); + size_type alloc_sz = alloc_size(bin_nr); assert(bin_number(alloc_sz) == bin_nr); @@ -227,16 +259,14 @@ namespace ocl std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl; try { return get_from_allocator(alloc_sz); } - catch (mempool::error &e) + catch (viennacl::ocl::mem_object_allocation_failure &e) { - if (!e.is_out_of_memory()) throw; } if (m_trace) std::cout << "[pool] allocation triggered OOM, running GC" << std::endl; - - // KK: removed it., didn't seem useful to me. + // m_allocator->try_release_blocks(); if (bin.size()) return pop_block_from_bin(bin, size); @@ -247,23 +277,22 @@ namespace ocl while (try_to_free_memory()) { try { return get_from_allocator(alloc_sz); } - catch (mempool::error &e) + catch (viennacl::ocl::mem_object_allocation_failure &e) { - if (!e.is_out_of_memory()) throw; } } - std::cerr << "memory_pool::allocate " - "failed to free memory for allocation" << std::endl; + std::cerr << ( + "memory_pool::allocate " + "failed to free memory for allocation\n"); throw viennacl::ocl::mem_object_allocation_failure(); - } - void free(cl_mem p, size_t size) + void free(pointer_type p, size_type size) { --m_active_blocks; - uint32_t bin_nr = bin_number(size); + bin_nr_t bin_nr = bin_number(size); if (!m_stop_holding) { @@ -281,11 +310,9 @@ namespace ocl void free_held() { - std::map>::reverse_iterator bin_pair = - m_container.rbegin(); - while (bin_pair != m_container.rend()) + for (bin_pair_t &bin_pair: m_container) { - std::vector &bin = bin_pair->second; + bin_t &bin = bin_pair.second; while (bin.size()) { @@ -313,13 +340,10 @@ namespace ocl bool try_to_free_memory() { - - std::map>::reverse_iterator bin_pair = - m_container.rbegin(); // free largest stuff first - while (bin_pair != m_container.rend()) + for (bin_pair_t &bin_pair: viennacl::mempool::reverse(m_container)) { - std::vector &bin = bin_pair->second; + bin_t &bin = bin_pair.second; if (bin.size()) { @@ -334,42 +358,32 @@ namespace ocl return false; } - protected: - std::vector &get_bin(uint32_t bin_nr) - { - typename std::map>::iterator it = m_container.find(bin_nr); - if (it == m_container.end()) - { - auto it_and_inserted = m_container.insert(std::make_pair(bin_nr, std::vector())); - assert(it_and_inserted.second); - return it_and_inserted.first->second; - } - else - return it->second; - } - void inc_held_blocks() + private: + pointer_type get_from_allocator(size_type alloc_sz) { - if (m_held_blocks == 0) - start_holding_blocks(); - ++m_held_blocks; - } + pointer_type result = m_allocator->allocate(alloc_sz); + ++m_active_blocks; - void dec_held_blocks() - { - --m_held_blocks; - if (m_held_blocks == 0) - stop_holding_blocks(); + return result; } - virtual void start_holding_blocks() - { } + pointer_type pop_block_from_bin(bin_t &bin, size_type size) + { + pointer_type result = bin.back(); + bin.pop_back(); - virtual void stop_holding_blocks() - { } + dec_held_blocks(); + ++m_active_blocks; + return result; + } }; + + // + //}}}} + // }}} @@ -530,6 +544,7 @@ public: */ cl_mem create_memory_without_smart_handle(cl_mem_flags flags, unsigned int size, void * ptr = NULL, bool use_mempool = false) const { + std :: cout << "Zarchar Danda, Use mempool is " << use_mempool << "\n"; #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Creating memory of size " << size << " for context " << h_ << " (unsafe, returning cl_mem directly)" << std::endl; #endif @@ -539,9 +554,11 @@ public: } if (ptr && !(flags & CL_MEM_USE_HOST_PTR)) flags |= CL_MEM_COPY_HOST_PTR; + std::cout << "Danda power 1\n"; cl_int err; cl_mem mem = clCreateBuffer(h_.get(), flags, size, ptr, &err); VIENNACL_ERR_CHECK(err); + std::cout << "Danda power 2 " << err << "\n"; return mem; } @@ -588,22 +605,12 @@ public: queues_[dev].push_back(viennacl::ocl::command_queue(queue_handle)); queues_[dev].back().handle().inc(); - if(queues_.find(dev) == queues_.end()) - { - // did not find a queue for the present device, need to allot an - // allocator. - allocators_[dev] = new - cl_immediate_allocator(tools::shared_ptr(this), - tools::shared_ptr(&(queues_[dev][0])), - CL_MEM_READ_WRITE); - mempools_[dev] = new - memory_pool(*allocators_[dev]); - } } /** @brief Adds a queue for the given device to the context */ void add_queue(cl_device_id dev) { + #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Adding new queue for device " << dev << " to context " << h_ << std::endl; #endif @@ -616,6 +623,19 @@ public: VIENNACL_ERR_CHECK(err); queues_[dev].push_back(viennacl::ocl::command_queue(temp)); + + // TODO: Need figure out why this is giving an error. + //if(queues_.find(dev) == queues_.end()) + //{ + // did not find a queue for the present device, need to allot an + // allocator. + allocators_[dev] = new + cl_immediate_allocator(tools::shared_ptr(this), + tools::shared_ptr(&(queues_[dev][0])), + CL_MEM_READ_WRITE); + mempools_[dev] = new + memory_pool(*allocators_[dev]); + //} } /** @brief Adds a queue for the given device to the context */ @@ -1255,11 +1275,6 @@ cl_mem cl_immediate_allocator::allocate(size_t s) // }}} -template -memory_pool::~memory_pool() -{ free_held(); } - - } } diff --git a/viennacl/ocl/mempool/mempool_utils.hpp b/viennacl/ocl/mempool/mempool_utils.hpp index 639ee218..de04ee31 100644 --- a/viennacl/ocl/mempool/mempool_utils.hpp +++ b/viennacl/ocl/mempool/mempool_utils.hpp @@ -30,8 +30,6 @@ #include #include - - namespace viennacl { namespace mempool @@ -106,6 +104,20 @@ namespace mempool return x >> shift_amount; } + // https://stackoverflow.com/a/28139075 + template + struct reversion_wrapper { T& iterable; }; + + template + auto begin (reversion_wrapper w) { return w.iterable.rbegin(); } + + template + auto end (reversion_wrapper w) { return w.iterable.rend(); } + + template + reversion_wrapper reverse (T&& iterable) { return { iterable }; } + + // https://stackoverflow.com/a/44175911 class noncopyable { public: diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index 4ca21944..22d2452d 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -970,7 +970,7 @@ public: */ explicit vector(size_type vec_size) : base_type(vec_size) {} - explicit vector(size_type vec_size, viennacl::context ctx) : base_type(vec_size, ctx) {} + explicit vector(size_type vec_size, viennacl::context ctx, bool use_mempool = false) : base_type(vec_size, ctx, use_mempool) {} explicit vector(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type vec_size, size_type start = 0, size_type stride = 1) : base_type(ptr_to_mem, mem_type, vec_size, start, stride) {} -- GitLab From c05e4ce065494bb5cc4dfa2fba4d174f11ebfe6a Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Nov 2018 13:58:25 -0600 Subject: [PATCH 06/46] destructor is a bit troublesome as of now --- viennacl/backend/mem_handle.hpp | 12 +++++++++--- viennacl/ocl/context.hpp | 24 +++++++++++++++--------- viennacl/ocl/handle.hpp | 13 +++++++++---- viennacl/vector.hpp | 2 +- 4 files changed, 34 insertions(+), 17 deletions(-) diff --git a/viennacl/backend/mem_handle.hpp b/viennacl/backend/mem_handle.hpp index 33832ac4..54b8a6cf 100644 --- a/viennacl/backend/mem_handle.hpp +++ b/viennacl/backend/mem_handle.hpp @@ -94,7 +94,7 @@ public: typedef viennacl::tools::shared_ptr cuda_handle_type; /** @brief Default CTOR. No memory is allocated */ - mem_handle() : active_handle_(MEMORY_NOT_INITIALIZED), size_in_bytes_(0) {} + mem_handle() : p_used_mempool(false), active_handle_(MEMORY_NOT_INITIALIZED), size_in_bytes_(0) {} /** @brief Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated. */ ram_handle_type & ram_handle() { return ram_handle_; } @@ -150,7 +150,13 @@ public: } } - void used_mempool(bool u) + + bool get_used_mempool(bool u) + { + return p_used_mempool; + } + + void set_used_mempool(bool u) { p_used_mempool = u; #ifndef VIENNACL_WITH_OPENCL @@ -260,7 +266,7 @@ public: } private: - bool p_used_mempool = false; + bool p_used_mempool; memory_types active_handle_; ram_handle_type ram_handle_; #ifdef VIENNACL_WITH_OPENCL diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 73b7df4d..58b4b4ea 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -60,7 +60,7 @@ namespace viennacl namespace ocl { - // {{{ memory pool declaration + // {{{ allocator class class cl_allocator_base { @@ -114,6 +114,8 @@ namespace ocl }; + // }}} + /// {{{ template @@ -160,7 +162,10 @@ namespace ocl } virtual ~memory_pool() - { free_held(); } + { + std :: cout << "Destructor of memory pool\n"; + free_held(); + } static const unsigned mantissa_bits = 2; static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; @@ -291,6 +296,8 @@ namespace ocl void free(pointer_type p, size_type size) { + std :: cout << "You didn't wanted me to deallocate and I am here.\n"; + --m_active_blocks; bin_nr_t bin_nr = bin_number(size); @@ -384,8 +391,6 @@ namespace ocl // //}}}} - // }}} - /** @brief Manages an OpenCL context and provides the respective convenience functions for creating buffers, etc. * @@ -544,7 +549,6 @@ public: */ cl_mem create_memory_without_smart_handle(cl_mem_flags flags, unsigned int size, void * ptr = NULL, bool use_mempool = false) const { - std :: cout << "Zarchar Danda, Use mempool is " << use_mempool << "\n"; #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Creating memory of size " << size << " for context " << h_ << " (unsafe, returning cl_mem directly)" << std::endl; #endif @@ -554,11 +558,10 @@ public: } if (ptr && !(flags & CL_MEM_USE_HOST_PTR)) flags |= CL_MEM_COPY_HOST_PTR; - std::cout << "Danda power 1\n"; cl_int err; + std::cout << "[viennacl]: I am allocating a buffer of size " << size << "\n"; cl_mem mem = clCreateBuffer(h_.get(), flags, size, ptr, &err); VIENNACL_ERR_CHECK(err); - std::cout << "Danda power 2 " << err << "\n"; return mem; } @@ -567,7 +570,6 @@ public: void deallocate_memory_in_pool(viennacl::ocl::handle& mem_handle, size_t size) { get_mempool()->free(mem_handle.get(), size); - return; } @@ -1234,6 +1236,7 @@ cl_allocator_base::~cl_allocator_base() void cl_allocator_base::free(cl_mem p) { + std :: cout << "[mempool]: Deallocating from mempool...\n"; cl_int err = clReleaseMemObject(p); VIENNACL_ERR_CHECK(err); } @@ -1242,15 +1245,18 @@ void cl_allocator_base::free(cl_mem p) // {{{ definitionof cl_immediate_allocator - bool cl_immediate_allocator::is_deferred() const { return false; } cl_mem cl_immediate_allocator::allocate(size_t s) { + + std :: cout << "[mempool]: requesting a buffer of size " << s << "\n"; cl_mem ptr = m_context->create_memory_without_smart_handle(m_flags, s, NULL); + std :: cout << "[mempool]: allocated a buffer of size " << s << "\n"; + // Make sure the buffer gets allocated right here and right now. // This looks (and is) expensive. But immediate allocators // have their main use in memory pools, whose basic assumption diff --git a/viennacl/ocl/handle.hpp b/viennacl/ocl/handle.hpp index 7d4603b4..9c673055 100644 --- a/viennacl/ocl/handle.hpp +++ b/viennacl/ocl/handle.hpp @@ -61,6 +61,7 @@ namespace viennacl static void dec(cl_mem & something) { + std :: cout << "[viennacl]: Deallocating from handle...\n"; cl_int err = clReleaseMemObject(something); VIENNACL_ERR_CHECK(err); } @@ -151,9 +152,8 @@ namespace viennacl template class handle { - bool used_mempool_ = false; public: - handle() : h_(0), p_context_(NULL) {} + handle() : used_mempool_(false), h_(0), p_context_(NULL) {} handle(const OCL_TYPE & something, viennacl::ocl::context const & c, bool u = false) : used_mempool_(u), h_(something), p_context_(&c) { if((typeid(OCL_TYPE) != typeid(cl_mem)) && used_mempool_) @@ -162,7 +162,8 @@ namespace viennacl throw std::exception(); } } - handle(const handle & other) : h_(other.h_), p_context_(other.p_context_) { if (h_ != 0) inc(); } + handle(const handle & other) : used_mempool_(other.used_mempool_), h_(other.h_), p_context_(other.p_context_) { + if (h_ != 0) inc(); } ~handle() { if (h_ != 0) dec(); } /** @brief Copies the OpenCL handle from the provided handle. Does not take ownership like e.g. std::auto_ptr<>, so both handle objects are valid (more like shared_ptr). */ @@ -218,7 +219,6 @@ namespace viennacl viennacl::ocl::context const * tmp2 = other.p_context_; other.p_context_ = this->p_context_; this->p_context_ = tmp2; - return *this; } @@ -233,8 +233,13 @@ namespace viennacl // vector base. or whatever equivalent. handle_inc_dec_helper::dec(h_); } + else + { + std::cout << "Saved you a deallocation\n"; + } } private: + bool used_mempool_; OCL_TYPE h_; viennacl::ocl::context const * p_context_; }; diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index 22d2452d..9e01222e 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -262,7 +262,7 @@ vector_base::vector_base(size_type vec_size, viennac if (size_ > 0) { #ifdef VIENNACL_WITH_OPENCL - elements_.used_mempool(use_mempool); + elements_.set_used_mempool(use_mempool); #endif viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx, NULL, use_mempool); clear(); -- GitLab From fe1a7baa9e01911f19b74792de6a1af8d8cbdfc3 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Nov 2018 16:19:10 -0600 Subject: [PATCH 07/46] memory pool implementation works, need to test for petsc --- viennacl/ocl/context.hpp | 40 +++++++++++++++++++--------------------- viennacl/ocl/handle.hpp | 4 ---- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 58b4b4ea..dbfeb8a2 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -65,10 +65,10 @@ namespace ocl class cl_allocator_base { protected: - tools::shared_ptr m_context; + viennacl::ocl::context* m_context; cl_mem_flags m_flags; public: - cl_allocator_base(tools::shared_ptr const&, cl_mem_flags); + cl_allocator_base(viennacl::ocl::context* const&, cl_mem_flags); cl_allocator_base(cl_allocator_base const &src); @@ -84,17 +84,17 @@ namespace ocl class cl_immediate_allocator : public cl_allocator_base { private: - tools::shared_ptr m_queue; + viennacl::ocl::command_queue* m_queue; public: // NOTE: Changed the declaration as viennacl comman=d queue does nt store // the context // - cl_immediate_allocator(tools::shared_ptr const &ctx, - tools::shared_ptr const &queue, + cl_immediate_allocator(viennacl::ocl::context* const &ctx, + viennacl::ocl::command_queue* const &queue, cl_mem_flags flags=CL_MEM_READ_WRITE) - : cl_allocator_base(tools::shared_ptr(ctx), flags), + : cl_allocator_base(ctx, flags), m_queue(queue) { } @@ -163,7 +163,6 @@ namespace ocl virtual ~memory_pool() { - std :: cout << "Destructor of memory pool\n"; free_held(); } @@ -296,8 +295,6 @@ namespace ocl void free(pointer_type p, size_type size) { - std :: cout << "You didn't wanted me to deallocate and I am here.\n"; - --m_active_blocks; bin_nr_t bin_nr = bin_number(size); @@ -631,12 +628,12 @@ public: //{ // did not find a queue for the present device, need to allot an // allocator. - allocators_[dev] = new - cl_immediate_allocator(tools::shared_ptr(this), - tools::shared_ptr(&(queues_[dev][0])), - CL_MEM_READ_WRITE); - mempools_[dev] = new - memory_pool(*allocators_[dev]); + allocators_[dev] = tools::shared_ptr(new + cl_immediate_allocator(this, + &(queues_[dev][0]), + CL_MEM_READ_WRITE)); + mempools_[dev] = tools::shared_ptr> (new + memory_pool(*allocators_[dev])); //} } @@ -655,9 +652,9 @@ public: } //get current mempool - viennacl::ocl::memory_pool* const& get_mempool() const + tools::shared_ptr> const& get_mempool() const { - typedef std::map< cl_device_id, viennacl::ocl::memory_pool* > MempoolContainer; + typedef std::map< cl_device_id, tools::shared_ptr> > MempoolContainer; MempoolContainer::const_iterator it = mempools_.find(devices_[current_device_id_].id()); return it->second; } @@ -1150,8 +1147,8 @@ private: vcl_size_t current_queue_id_; // Memory pool - std::map< cl_device_id, cl_immediate_allocator*> allocators_; - std::map< cl_device_id, memory_pool*> mempools_; + std::map< cl_device_id, tools::shared_ptr> allocators_; + std::map< cl_device_id, tools::shared_ptr>> mempools_; }; //context @@ -1212,7 +1209,7 @@ inline void viennacl::ocl::kernel::set_work_size_defaults() // {{{ viennacl::ocl::cl_allocator_base definition // CTOR -cl_allocator_base::cl_allocator_base(tools::shared_ptr const &ctx, +cl_allocator_base::cl_allocator_base(viennacl::ocl::context* const &ctx, cl_mem_flags flags=CL_MEM_READ_WRITE) : m_context(ctx), m_flags(flags) { @@ -1236,9 +1233,10 @@ cl_allocator_base::~cl_allocator_base() void cl_allocator_base::free(cl_mem p) { - std :: cout << "[mempool]: Deallocating from mempool...\n"; + std :: cout << "[mempool]: came to deallocate\n"; cl_int err = clReleaseMemObject(p); VIENNACL_ERR_CHECK(err); + std :: cout << "[mempool]: done with deallocation\n"; } // }}} diff --git a/viennacl/ocl/handle.hpp b/viennacl/ocl/handle.hpp index 9c673055..ac02869a 100644 --- a/viennacl/ocl/handle.hpp +++ b/viennacl/ocl/handle.hpp @@ -233,10 +233,6 @@ namespace viennacl // vector base. or whatever equivalent. handle_inc_dec_helper::dec(h_); } - else - { - std::cout << "Saved you a deallocation\n"; - } } private: bool used_mempool_; -- GitLab From cb2dcaba8eab1e0abe55342c5677802af8e51761 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Nov 2018 16:29:34 -0600 Subject: [PATCH 08/46] enforces linear operations to use memory pool --- viennacl/linalg/opencl/vector_operations.hpp | 35 ++++++++++++-------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/viennacl/linalg/opencl/vector_operations.hpp b/viennacl/linalg/opencl/vector_operations.hpp index 2b1c24ca..b7602425 100644 --- a/viennacl/linalg/opencl/vector_operations.hpp +++ b/viennacl/linalg/opencl/vector_operations.hpp @@ -42,6 +42,15 @@ #include "viennacl/traits/handle.hpp" #include "viennacl/traits/stride.hpp" +#ifdef VIENNACL_WITH_OPENCL +#define USE_MEMPOOL true +#endif + +#ifndef VIENNACL_WITH_OPENCL +#define USE_MEMPOOL false +#endif + + namespace viennacl { namespace linalg @@ -531,7 +540,7 @@ void inner_prod_impl(vector_base const & vec1, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec1).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec1)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec1), USE_MEMPOOL); temp.resize(work_groups, ctx); // bring default-constructed vectors to the correct size: // Step 1: Compute partial inner products for each work group: @@ -592,7 +601,7 @@ void inner_prod_impl(vector_base const & x, viennacl::ocl::kernel & inner_prod_kernel_8 = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector_multi_inner_prod::program_name(), "inner_prod8"); vcl_size_t work_groups = inner_prod_kernel_8.global_work_size(0) / inner_prod_kernel_8.local_work_size(0); - viennacl::vector temp(8 * work_groups, viennacl::traits::context(x)); + viennacl::vector temp(8 * work_groups, viennacl::traits::context(x), USE_MEMPOOL); vcl_size_t current_index = 0; while (current_index < vec_tuple.const_size()) @@ -762,7 +771,7 @@ void inner_prod_cpu(vector_base const & vec1, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec1).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec1)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec1), USE_MEMPOOL); temp.resize(work_groups, ctx); // bring default-constructed vectors to the correct size: // Step 1: Compute partial inner products for each work group: @@ -829,7 +838,7 @@ void norm_1_impl(vector_base const & vec, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec), USE_MEMPOOL); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 1); @@ -858,7 +867,7 @@ void norm_1_cpu(vector_base const & vec, T & result) { vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec), USE_MEMPOOL); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 1); @@ -893,7 +902,7 @@ void norm_2_impl(vector_base const & vec, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec), USE_MEMPOOL); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 2); @@ -922,7 +931,7 @@ void norm_2_cpu(vector_base const & vec, T & result) { vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec), USE_MEMPOOL); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 2); @@ -957,7 +966,7 @@ void norm_inf_impl(vector_base const & vec, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec), USE_MEMPOOL); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 0); @@ -986,7 +995,7 @@ void norm_inf_cpu(vector_base const & vec, T & result) { vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec), USE_MEMPOOL); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 0); @@ -1059,7 +1068,7 @@ void max_impl(vector_base const & x, viennacl::linalg::opencl::kernels::vector::init(ctx); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(x)); + viennacl::vector temp(work_groups, viennacl::traits::context(x), USE_MEMPOOL); viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector::program_name(), "max_kernel"); @@ -1095,7 +1104,7 @@ void max_cpu(vector_base const & x, viennacl::linalg::opencl::kernels::vector::init(ctx); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(x)); + viennacl::vector temp(work_groups, viennacl::traits::context(x), USE_MEMPOOL); viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector::program_name(), "max_kernel"); @@ -1138,7 +1147,7 @@ void min_impl(vector_base const & x, viennacl::linalg::opencl::kernels::vector::init(ctx); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(x)); + viennacl::vector temp(work_groups, viennacl::traits::context(x), USE_MEMPOOL); viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector::program_name(), "min_kernel"); @@ -1174,7 +1183,7 @@ void min_cpu(vector_base const & x, viennacl::linalg::opencl::kernels::vector::init(ctx); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(x)); + viennacl::vector temp(work_groups, viennacl::traits::context(x), USE_MEMPOOL); viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector::program_name(), "min_kernel"); -- GitLab From 0e658c83b412698ef6022ca1c1fc1b08e51f4f2a Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 12 Nov 2018 16:42:39 -0600 Subject: [PATCH 09/46] wraps the debug statements in VIENNACL_DEBUG_ALL --- viennacl/ocl/context.hpp | 9 ++++++++- viennacl/ocl/handle.hpp | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index dbfeb8a2..1252bfc8 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -556,7 +556,9 @@ public: if (ptr && !(flags & CL_MEM_USE_HOST_PTR)) flags |= CL_MEM_COPY_HOST_PTR; cl_int err; +#ifdef VIENNACL_DEBUG_ALL std::cout << "[viennacl]: I am allocating a buffer of size " << size << "\n"; +#endif cl_mem mem = clCreateBuffer(h_.get(), flags, size, ptr, &err); VIENNACL_ERR_CHECK(err); return mem; @@ -1233,10 +1235,11 @@ cl_allocator_base::~cl_allocator_base() void cl_allocator_base::free(cl_mem p) { - std :: cout << "[mempool]: came to deallocate\n"; cl_int err = clReleaseMemObject(p); VIENNACL_ERR_CHECK(err); +#ifdef VIENNACL_DEBUG_ALL std :: cout << "[mempool]: done with deallocation\n"; +#endif } // }}} @@ -1249,11 +1252,15 @@ bool cl_immediate_allocator::is_deferred() const cl_mem cl_immediate_allocator::allocate(size_t s) { +#ifdef VIENNACL_DEBUG_ALL std :: cout << "[mempool]: requesting a buffer of size " << s << "\n"; +#endif cl_mem ptr = m_context->create_memory_without_smart_handle(m_flags, s, NULL); +#ifdef VIENNACL_DEBUG_ALL std :: cout << "[mempool]: allocated a buffer of size " << s << "\n"; +#endif // Make sure the buffer gets allocated right here and right now. // This looks (and is) expensive. But immediate allocators diff --git a/viennacl/ocl/handle.hpp b/viennacl/ocl/handle.hpp index ac02869a..631f58d3 100644 --- a/viennacl/ocl/handle.hpp +++ b/viennacl/ocl/handle.hpp @@ -61,7 +61,9 @@ namespace viennacl static void dec(cl_mem & something) { +#ifdef VIENNACL_DEBUG_ALL std :: cout << "[viennacl]: Deallocating from handle...\n"; +#endif cl_int err = clReleaseMemObject(something); VIENNACL_ERR_CHECK(err); } -- GitLab From 89c5848013c87553cac6ba57e585fc4c8e262504 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 13 Nov 2018 19:45:49 -0600 Subject: [PATCH 10/46] confirm run on petsc, still some issues with firedrake --- viennacl/ocl/context.hpp | 91 +++--- viennacl/ocl/mempool/mempool.hpp | 521 ------------------------------- 2 files changed, 36 insertions(+), 576 deletions(-) delete mode 100644 viennacl/ocl/mempool/mempool.hpp diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 1252bfc8..e77f0516 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -68,17 +68,43 @@ namespace ocl viennacl::ocl::context* m_context; cl_mem_flags m_flags; public: - cl_allocator_base(viennacl::ocl::context* const&, cl_mem_flags); - cl_allocator_base(cl_allocator_base const &src); - virtual ~cl_allocator_base(); +// CTOR +cl_allocator_base(viennacl::ocl::context* const &ctx, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : m_context(ctx), m_flags(flags) +{ + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) + { + std::cerr << "[Allocator]: cannot specify USE_HOST_PTR or " + "COPY_HOST_PTR flags" << std::endl; + throw viennacl::ocl::invalid_value(); + } +} + + +// Copy CTOR +cl_allocator_base(cl_allocator_base const &src) +: m_context(src.m_context), m_flags(src.m_flags) +{ } + + +~cl_allocator_base() +{ } + +void free(cl_mem p) +{ + cl_int err = clReleaseMemObject(p); + VIENNACL_ERR_CHECK(err); +#ifdef VIENNACL_DEBUG_ALL + std :: cout << "[mempool]: done with deallocation\n"; +#endif +} virtual cl_allocator_base *copy() const = 0; virtual bool is_deferred() const = 0; virtual cl_mem allocate(size_t) = 0; - - void free(cl_mem ); }; class cl_immediate_allocator : public cl_allocator_base @@ -107,10 +133,13 @@ namespace ocl return new cl_immediate_allocator(*this); } + inline cl_mem allocate(size_t s); + + - bool is_deferred() const; + bool is_deferred() const + { return false; } - cl_mem allocate(size_t ); }; @@ -556,9 +585,6 @@ public: if (ptr && !(flags & CL_MEM_USE_HOST_PTR)) flags |= CL_MEM_COPY_HOST_PTR; cl_int err; -#ifdef VIENNACL_DEBUG_ALL - std::cout << "[viennacl]: I am allocating a buffer of size " << size << "\n"; -#endif cl_mem mem = clCreateBuffer(h_.get(), flags, size, ptr, &err); VIENNACL_ERR_CHECK(err); return mem; @@ -1208,60 +1234,15 @@ inline void viennacl::ocl::kernel::set_work_size_defaults() } } -// {{{ viennacl::ocl::cl_allocator_base definition - -// CTOR -cl_allocator_base::cl_allocator_base(viennacl::ocl::context* const &ctx, - cl_mem_flags flags=CL_MEM_READ_WRITE) - : m_context(ctx), m_flags(flags) -{ - if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) - { - std::cerr << "[Allocator]: cannot specify USE_HOST_PTR or " - "COPY_HOST_PTR flags" << std::endl; - throw viennacl::ocl::invalid_value(); - } -} - - -// Copy CTOR -cl_allocator_base::cl_allocator_base(cl_allocator_base const &src) -: m_context(src.m_context), m_flags(src.m_flags) -{ } - - -cl_allocator_base::~cl_allocator_base() -{ } - -void cl_allocator_base::free(cl_mem p) -{ - cl_int err = clReleaseMemObject(p); - VIENNACL_ERR_CHECK(err); -#ifdef VIENNACL_DEBUG_ALL - std :: cout << "[mempool]: done with deallocation\n"; -#endif -} - -// }}} - // {{{ definitionof cl_immediate_allocator -bool cl_immediate_allocator::is_deferred() const -{ return false; } cl_mem cl_immediate_allocator::allocate(size_t s) { -#ifdef VIENNACL_DEBUG_ALL - std :: cout << "[mempool]: requesting a buffer of size " << s << "\n"; -#endif cl_mem ptr = m_context->create_memory_without_smart_handle(m_flags, s, NULL); -#ifdef VIENNACL_DEBUG_ALL - std :: cout << "[mempool]: allocated a buffer of size " << s << "\n"; -#endif - // Make sure the buffer gets allocated right here and right now. // This looks (and is) expensive. But immediate allocators // have their main use in memory pools, whose basic assumption diff --git a/viennacl/ocl/mempool/mempool.hpp b/viennacl/ocl/mempool/mempool.hpp deleted file mode 100644 index 6e591c19..00000000 --- a/viennacl/ocl/mempool/mempool.hpp +++ /dev/null @@ -1,521 +0,0 @@ -// Abstract memory pool implementation -// -// Copyright (C) 2009-17 Andreas Kloeckner -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - - -#ifndef VIENNACL_MEMPOOL_MEMPOOL_HPP_ -#define VIENNACL_MEMPOOL_MEMPOOL_HPP_ - - -#include -#include -#include -#include -#include -#include - -#include "viennacl/ocl/error.hpp" - -#ifdef __APPLE__ -#include -#else -#include -#endif - -namespace viennacl -{ -namespace mempool -{ - - // {{{ Allocator - - class cl_allocator_base - { - protected: - tools::shared_ptr m_context; - cl_mem_flags m_flags; - - public: - // NOTE: pyopencl::context -> cl_context - // Q: Should I make this viennacl::context - cl_allocator_base(tools::shared_ptr const &ctx, - cl_mem_flags flags=CL_MEM_READ_WRITE) - : m_context(ctx), m_flags(flags) - { - if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) - { - std::cerr << "[Allocator]: cannot specify USE_HOST_PTR or " - "COPY_HOST_PTR flags" << std::endl; - throw viennacl::ocl::invalid_value(); - } - } - - cl_allocator_base(cl_allocator_base const &src) - : m_context(src.m_context), m_flags(src.m_flags) - { } - - virtual ~cl_allocator_base() - { } - - typedef cl_mem cl_mem; - typedef size_t size_t; - - virtual cl_allocator_base *copy() const = 0; - virtual bool is_deferred() const = 0; - virtual cl_mem allocate(size_t s) = 0; - - void free(cl_mem p) - { - cl_int err = clReleaseMemObject(p); - VIENNACL_ERR_CHECK(err); - } - // NOTE: removed the function "try_release_blocks()" - }; - - class cl_deferred_allocator : public cl_allocator_base - { - private: - typedef cl_allocator_base super; - - public: - cl_deferred_allocator(tools::shared_ptr const &ctx, - cl_mem_flags flags=CL_MEM_READ_WRITE) - : super(ctx, flags) - { } - - cl_allocator_base *copy() const - { - return new cl_deferred_allocator(*this); - } - - bool is_deferred() const - { return true; } - - cl_mem allocate(size_t s) - { - return m_context->create_memory_without_smart_handle(m_flags, s, NULL); - } - }; - - class cl_immediate_allocator : public cl_allocator_base - { - private: - typedef cl_allocator_base super; - tools::shared_ptr const & m_queue; - - public: - // NOTE: Changed the declaration as viennacl comman=d queue does nt store - // the context - cl_immediate_allocator(tools::shared_ptr const &ctx, - tools::shared_ptr const &queue, - cl_mem_flags flags=CL_MEM_READ_WRITE) - : super(tools::shared_ptr(ctx), flags), - m_queue(queue) - { } - - cl_immediate_allocator(cl_immediate_allocator const &src) - : super(src), m_queue(src.m_queue) - { } - - cl_allocator_base *copy() const - { - return new cl_immediate_allocator(*this); - } - - bool is_deferred() const - { return false; } - - cl_mem allocate(size_t s) - { - cl_mem ptr = - m_context->create_memory_without_smart_handle(m_flags, s, NULL); - - // Make sure the buffer gets allocated right here and right now. - // This looks (and is) expensive. But immediate allocators - // have their main use in memory pools, whose basic assumption - // is that allocation is too expensive anyway--but they rely - // on exact 'out-of-memory' information. - unsigned zero = 0; - cl_int err = clEnqueueWriteBuffer( - m_queue->handle().get(), - ptr, - /* is blocking */ CL_FALSE, - 0, std::min(s, sizeof(zero)), &zero, - 0, NULL, NULL - ); - VIENNACL_ERR_CHECK(err); - - // No need to wait for completion here. clWaitForEvents (e.g.) - // cannot return mem object allocation failures. This implies that - // the buffer is faulted onto the device on enqueue. - - return ptr; - } - }; - - inline - cl_mem allocator_call(cl_allocator_base &alloc, size_t size) - { - cl_mem mem; - int try_count = 0; - while (try_count < 2) - { - try - { - mem = alloc.allocate(size); - break; - } - catch (viennacl::ocl::mem_object_allocation_failure &e) - { - if (++try_count == 2) - throw; - } - - //NOTE: There was a try_release blocks over here - // which I got rid off. Is that fine? - - // alloc.try_release_blocks(); - } - - try - { - // Note: PyOpenCL retains this buffer, however in ViennaCL, there - // doesn't seem to be any option to not retain it. - return mem; - } - catch (...) - { - cl_int err = clReleaseMemObject(mem); - VIENNACL_ERR_CHECK(err); - throw; - } - } - // }}} - - template - class memory_pool : mempool::noncopyable - { - private: - std::map> m_container; - - std::unique_ptr m_allocator; - - // A held block is one that's been released by the application, but that - // we are keeping around to dish out again. - unsigned m_held_blocks; - - // An active block is one that is in use by the application. - unsigned m_active_blocks; - - bool m_stop_holding; - int m_trace; - - public: - memory_pool(Allocator const &alloc=Allocator()) - : m_allocator(alloc.copy()), - m_held_blocks(0), m_active_blocks(0), m_stop_holding(false), - m_trace(false) - { - if (m_allocator->is_deferred()) - { - throw std::runtime_error("Memory pools expect non-deferred " - "semantics from their allocators. You passed a deferred " - "allocator, i.e. an allocator whose allocations can turn out to " - "be unavailable long after allocation."); - } - } - - virtual ~memory_pool() - { free_held(); } - - static const unsigned mantissa_bits = 2; - static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; - - static uint32_t bin_number(size_t size) - { - signed l = bitlog2(size); - size_t shifted = signed_right_shift(size, l-signed(mantissa_bits)); - if (size && (shifted & (1 << mantissa_bits)) == 0) - throw std::runtime_error("memory_pool::bin_number: bitlog2 fault"); - size_t chopped = shifted & mantissa_mask; - return l << mantissa_bits | chopped; - } - - void set_trace(bool flag) - { - if (flag) - ++m_trace; - else - --m_trace; - } - - static size_t alloc_size(uint32_t bin) - { - uint32_t exponent = bin >> mantissa_bits; - uint32_t mantissa = bin & mantissa_mask; - - size_t ones = signed_left_shift(1, - signed(exponent)-signed(mantissa_bits) - ); - if (ones) ones -= 1; - - size_t head = signed_left_shift( - (1< &get_bin(uint32_t bin_nr) - { - typename std::map>::iterator it = m_container.find(bin_nr); - if (it == m_container.end()) - { - auto it_and_inserted = m_container.insert(std::make_pair(bin_nr, std::vector())); - assert(it_and_inserted.second); - return it_and_inserted.first->second; - } - else - return it->second; - } - - void inc_held_blocks() - { - if (m_held_blocks == 0) - start_holding_blocks(); - ++m_held_blocks; - } - - void dec_held_blocks() - { - --m_held_blocks; - if (m_held_blocks == 0) - stop_holding_blocks(); - } - - virtual void start_holding_blocks() - { } - - virtual void stop_holding_blocks() - { } - - public: - cl_mem allocate(size_t size) - { - uint32_t bin_nr = bin_number(size); - std::vector &bin = get_bin(bin_nr); - - if (bin.size()) - { - if (m_trace) - std::cout - << "[pool] allocation of size " << size << " served from bin " << bin_nr - << " which contained " << bin.size() << " entries" << std::endl; - return pop_block_from_bin(bin, size); - } - - size_t alloc_sz = alloc_size(bin_nr); - - assert(bin_number(alloc_sz) == bin_nr); - - if (m_trace) - std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl; - - try { return get_from_allocator(alloc_sz); } - catch (mempool::error &e) - { - if (!e.is_out_of_memory()) - throw; - } - - if (m_trace) - std::cout << "[pool] allocation triggered OOM, running GC" << std::endl; - - m_allocator->try_release_blocks(); - if (bin.size()) - return pop_block_from_bin(bin, size); - - if (m_trace) - std::cout << "[pool] allocation still OOM after GC" << std::endl; - - while (try_to_free_memory()) - { - try { return get_from_allocator(alloc_sz); } - catch (mempool::error &e) - { - if (!e.is_out_of_memory()) - throw; - } - } - - std::cerr << "memory_pool::allocate " - "failed to free memory for allocation" << std::endl; - throw viennacl::ocl::mem_object_allocation_failure(); - - } - - void free(cl_mem p, size_t size) - { - --m_active_blocks; - uint32_t bin_nr = bin_number(size); - - if (!m_stop_holding) - { - inc_held_blocks(); - get_bin(bin_nr).push_back(p); - - if (m_trace) - std::cout << "[pool] block of size " << size << " returned to bin " - << bin_nr << " which now contains " << get_bin(bin_nr).size() - << " entries" << std::endl; - } - else - m_allocator->free(p); - } - - void free_held() - { - for (std::map>::value_type &bin_pair: m_container) - { - std::vector &bin = bin_pair.second; - - while (bin.size()) - { - m_allocator->free(bin.back()); - bin.pop_back(); - - dec_held_blocks(); - } - } - - assert(m_held_blocks == 0); - } - - void stop_holding() - { - m_stop_holding = true; - free_held(); - } - - unsigned active_blocks() - { return m_active_blocks; } - - unsigned held_blocks() - { return m_held_blocks; } - - bool try_to_free_memory() - { - // free largest stuff first - for (std::map>::value_type &bin_pair: reverse(m_container)) - { - std::vector &bin = bin_pair.second; - - if (bin.size()) - { - m_allocator->free(bin.back()); - bin.pop_back(); - - dec_held_blocks(); - - return true; - } - } - - return false; - } - - private: - cl_mem get_from_allocator(size_t alloc_sz) - { - cl_mem result = m_allocator->allocate(alloc_sz); - ++m_active_blocks; - - return result; - } - - cl_mem pop_block_from_bin(std::vector &bin, size_t size) - { - cl_mem result = bin.back(); - bin.pop_back(); - - dec_held_blocks(); - ++m_active_blocks; - - return result; - } - }; - - - template - class pooled_allocation : public mempool::noncopyable - { - public: - typedef Pool pool_type; - typedef typename Pool::cl_mem cl_mem; - typedef typename Pool::size_t size_t; - - private: - tools::shared_ptr m_pool; - - cl_mem m_ptr; - size_t m_size; - bool m_valid; - - public: - pooled_allocation(tools::shared_ptr p, size_t size) - : m_pool(p), m_ptr(p->allocate(size)), m_size(size), m_valid(true) - { } - - ~pooled_allocation() - { - if (m_valid) - free(); - } - - void free() - { - if (m_valid) - { - m_pool->free(m_ptr, m_size); - m_valid = false; - } - else - throw mempool::error( - "pooled_device_allocation::free", - CL_INVALID_VALUE - ); - } - - cl_mem ptr() const - { return m_ptr; } - - size_t size() const - { return m_size; } - }; -} -} - -#endif -- GitLab From a875a1a25d650e2580a2120d45ac6d41dd07a7ff Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 15 Nov 2018 14:12:51 -0600 Subject: [PATCH 11/46] some more minor changes --- viennacl/ocl/context.hpp | 69 ++++--- viennacl/tools/mempool.hpp | 382 ------------------------------------- 2 files changed, 32 insertions(+), 419 deletions(-) delete mode 100644 viennacl/tools/mempool.hpp diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index e77f0516..4cb936f6 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -67,44 +67,43 @@ namespace ocl protected: viennacl::ocl::context* m_context; cl_mem_flags m_flags; - public: - -// CTOR -cl_allocator_base(viennacl::ocl::context* const &ctx, - cl_mem_flags flags=CL_MEM_READ_WRITE) - : m_context(ctx), m_flags(flags) -{ - if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) - { - std::cerr << "[Allocator]: cannot specify USE_HOST_PTR or " - "COPY_HOST_PTR flags" << std::endl; - throw viennacl::ocl::invalid_value(); - } -} + public: + // CTOR + cl_allocator_base(viennacl::ocl::context* const &ctx, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : m_context(ctx), m_flags(flags) + { + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) + { + std::cerr << "[Allocator]: cannot specify USE_HOST_PTR or " + "COPY_HOST_PTR flags" << std::endl; + throw viennacl::ocl::invalid_value(); + } + } -// Copy CTOR -cl_allocator_base(cl_allocator_base const &src) -: m_context(src.m_context), m_flags(src.m_flags) -{ } + // Copy CTOR + cl_allocator_base(cl_allocator_base const &src) + : m_context(src.m_context), m_flags(src.m_flags) + { } -~cl_allocator_base() -{ } + ~cl_allocator_base() + { } -void free(cl_mem p) -{ - cl_int err = clReleaseMemObject(p); - VIENNACL_ERR_CHECK(err); + void free(cl_mem p) + { + cl_int err = clReleaseMemObject(p); + VIENNACL_ERR_CHECK(err); #ifdef VIENNACL_DEBUG_ALL - std :: cout << "[mempool]: done with deallocation\n"; + std :: cout << "[mempool]: done with deallocation\n"; #endif -} + } - virtual cl_allocator_base *copy() const = 0; - virtual bool is_deferred() const = 0; - virtual cl_mem allocate(size_t) = 0; + virtual cl_allocator_base *copy() const = 0; + virtual bool is_deferred() const = 0; + virtual cl_mem allocate(size_t) = 0; }; class cl_immediate_allocator : public cl_allocator_base @@ -134,12 +133,10 @@ void free(cl_mem p) } inline cl_mem allocate(size_t s); + bool is_deferred() const + { return false; } - - - bool is_deferred() const - { return false; } - + virtual ~cl_immediate_allocator() {} }; @@ -575,13 +572,11 @@ public: */ cl_mem create_memory_without_smart_handle(cl_mem_flags flags, unsigned int size, void * ptr = NULL, bool use_mempool = false) const { -#if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) - std::cout << "ViennaCL: Creating memory of size " << size << " for context " << h_ << " (unsafe, returning cl_mem directly)" << std::endl; -#endif if(use_mempool){ cl_mem mem = get_mempool()->allocate(size); return mem; } + std::cout << "ViennaCL: Creating memory of size " << size << " for context " << h_ << " (unsafe, returning cl_mem directly)" << std::endl; if (ptr && !(flags & CL_MEM_USE_HOST_PTR)) flags |= CL_MEM_COPY_HOST_PTR; cl_int err; diff --git a/viennacl/tools/mempool.hpp b/viennacl/tools/mempool.hpp deleted file mode 100644 index 48dbff04..00000000 --- a/viennacl/tools/mempool.hpp +++ /dev/null @@ -1,382 +0,0 @@ -// Abstract memory pool implementation -// -// Copyright (C) 2009-17 Andreas Kloeckner -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - - -#ifndef VIENNACL_MEMPOOL_MEMPOOL_HPP_ -#define VIENNACL_MEMPOOL_MEMPOOL_HPP_ - - -#include -#include -#include -#include -#include -#include -#include "viennacl/mempool/bitlog.hpp" -#include "viennacl/mempool/mempool_utils.hpp" - -namespace viennacl -{ -namespace mempool -{ - template - inline T signed_left_shift(T x, signed shift_amount) - { - if (shift_amount < 0) - return x >> -shift_amount; - else - return x << shift_amount; - } - - - - - template - inline T signed_right_shift(T x, signed shift_amount) - { - if (shift_amount < 0) - return x << -shift_amount; - else - return x >> shift_amount; - } - - - - - template - class memory_pool : noncopyable - { - public: - typedef typename Allocator::pointer_type pointer_type; - typedef typename Allocator::size_type size_type; - - private: - typedef uint32_t bin_nr_t; - typedef std::vector bin_t; - - typedef std::map container_t; - container_t m_container; - typedef typename container_t::value_type bin_pair_t; - - std::unique_ptr m_allocator; - - // A held block is one that's been released by the application, but that - // we are keeping around to dish out again. - unsigned m_held_blocks; - - // An active block is one that is in use by the application. - unsigned m_active_blocks; - - bool m_stop_holding; - int m_trace; - - public: - memory_pool(Allocator const &alloc=Allocator()) - : m_allocator(alloc.copy()), - m_held_blocks(0), m_active_blocks(0), m_stop_holding(false), - m_trace(false) - { - if (m_allocator->is_deferred()) - { - throw std::runtime_error("Memory pools expect non-deferred " - "semantics from their allocators. You passed a deferred " - "allocator, i.e. an allocator whose allocations can turn out to " - "be unavailable long after allocation."); - } - } - - virtual ~memory_pool() - { free_held(); } - - static const unsigned mantissa_bits = 2; - static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; - - static bin_nr_t bin_number(size_type size) - { - signed l = bitlog2(size); - size_type shifted = signed_right_shift(size, l-signed(mantissa_bits)); - if (size && (shifted & (1 << mantissa_bits)) == 0) - throw std::runtime_error("memory_pool::bin_number: bitlog2 fault"); - size_type chopped = shifted & mantissa_mask; - return l << mantissa_bits | chopped; - } - - void set_trace(bool flag) - { - if (flag) - ++m_trace; - else - --m_trace; - } - - static size_type alloc_size(bin_nr_t bin) - { - bin_nr_t exponent = bin >> mantissa_bits; - bin_nr_t mantissa = bin & mantissa_mask; - - size_type ones = signed_left_shift(1, - signed(exponent)-signed(mantissa_bits) - ); - if (ones) ones -= 1; - - size_type head = signed_left_shift( - (1<second; - } - else - return it->second; - } - - void inc_held_blocks() - { - if (m_held_blocks == 0) - start_holding_blocks(); - ++m_held_blocks; - } - - void dec_held_blocks() - { - --m_held_blocks; - if (m_held_blocks == 0) - stop_holding_blocks(); - } - - virtual void start_holding_blocks() - { } - - virtual void stop_holding_blocks() - { } - - public: - pointer_type allocate(size_type size) - { - bin_nr_t bin_nr = bin_number(size); - bin_t &bin = get_bin(bin_nr); - - if (bin.size()) - { - if (m_trace) - std::cout - << "[pool] allocation of size " << size << " served from bin " << bin_nr - << " which contained " << bin.size() << " entries" << std::endl; - return pop_block_from_bin(bin, size); - } - - size_type alloc_sz = alloc_size(bin_nr); - - assert(bin_number(alloc_sz) == bin_nr); - - if (m_trace) - std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl; - - try { return get_from_allocator(alloc_sz); } - catch (mempool::error &e) - { - if (!e.is_out_of_memory()) - throw; - } - - if (m_trace) - std::cout << "[pool] allocation triggered OOM, running GC" << std::endl; - - m_allocator->try_release_blocks(); - if (bin.size()) - return pop_block_from_bin(bin, size); - - if (m_trace) - std::cout << "[pool] allocation still OOM after GC" << std::endl; - - while (try_to_free_memory()) - { - try { return get_from_allocator(alloc_sz); } - catch (mempool::error &e) - { - if (!e.is_out_of_memory()) - throw; - } - } - - throw error( - "memory_pool::allocate", - CL_MEM_OBJECT_ALLOCATION_FAILURE, - "failed to free memory for allocation"); - } - - void free(pointer_type p, size_type size) - { - --m_active_blocks; - bin_nr_t bin_nr = bin_number(size); - - if (!m_stop_holding) - { - inc_held_blocks(); - get_bin(bin_nr).push_back(p); - - if (m_trace) - std::cout << "[pool] block of size " << size << " returned to bin " - << bin_nr << " which now contains " << get_bin(bin_nr).size() - << " entries" << std::endl; - } - else - m_allocator->free(p); - } - - void free_held() - { - for (bin_pair_t &bin_pair: m_container) - { - bin_t &bin = bin_pair.second; - - while (bin.size()) - { - m_allocator->free(bin.back()); - bin.pop_back(); - - dec_held_blocks(); - } - } - - assert(m_held_blocks == 0); - } - - void stop_holding() - { - m_stop_holding = true; - free_held(); - } - - unsigned active_blocks() - { return m_active_blocks; } - - unsigned held_blocks() - { return m_held_blocks; } - - bool try_to_free_memory() - { - // free largest stuff first - for (bin_pair_t &bin_pair: reverse(m_container)) - { - bin_t &bin = bin_pair.second; - - if (bin.size()) - { - m_allocator->free(bin.back()); - bin.pop_back(); - - dec_held_blocks(); - - return true; - } - } - - return false; - } - - private: - pointer_type get_from_allocator(size_type alloc_sz) - { - pointer_type result = m_allocator->allocate(alloc_sz); - ++m_active_blocks; - - return result; - } - - pointer_type pop_block_from_bin(bin_t &bin, size_type size) - { - pointer_type result = bin.back(); - bin.pop_back(); - - dec_held_blocks(); - ++m_active_blocks; - - return result; - } - }; - - - template - class pooled_allocation : public noncopyable - { - public: - typedef Pool pool_type; - typedef typename Pool::pointer_type pointer_type; - typedef typename Pool::size_type size_type; - - private: - std::shared_ptr m_pool; - - pointer_type m_ptr; - size_type m_size; - bool m_valid; - - public: - pooled_allocation(std::shared_ptr p, size_type size) - : m_pool(p), m_ptr(p->allocate(size)), m_size(size), m_valid(true) - { } - - ~pooled_allocation() - { - if (m_valid) - free(); - } - - void free() - { - if (m_valid) - { - m_pool->free(m_ptr, m_size); - m_valid = false; - } - else - throw mempool::error( - "pooled_device_allocation::free", - CL_INVALID_VALUE - ); - } - - pointer_type ptr() const - { return m_ptr; } - - size_type size() const - { return m_size; } - }; -} -} - -#endif -- GitLab From a1897fda9918a73e2b916fc4b0fcf5c149599e72 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 15 Nov 2018 19:24:40 -0600 Subject: [PATCH 12/46] added a file structure which makes sense --- viennacl/ocl/context.hpp | 281 +---------------------------- viennacl/ocl/mempool/mempool.hpp | 296 +++++++++++++++++++++++++++++++ 2 files changed, 302 insertions(+), 275 deletions(-) create mode 100644 viennacl/ocl/mempool/mempool.hpp diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 4cb936f6..e133a458 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -51,8 +51,8 @@ #include "viennacl/ocl/command_queue.hpp" #include "viennacl/tools/sha1.hpp" #include "viennacl/ocl/mempool/bitlog.hpp" -#include "viennacl/ocl/mempool/mempool_utils.hpp" #include "viennacl/tools/shared_ptr.hpp" +#include "viennacl/ocl/mempool/mempool.hpp" namespace viennacl @@ -96,9 +96,7 @@ namespace ocl { cl_int err = clReleaseMemObject(p); VIENNACL_ERR_CHECK(err); -#ifdef VIENNACL_DEBUG_ALL - std :: cout << "[mempool]: done with deallocation\n"; -#endif + std :: cout << "[allocator]: deallocation memory: " << p << std::endl; } virtual cl_allocator_base *copy() const = 0; @@ -142,277 +140,6 @@ namespace ocl // }}} - /// {{{ - - template - class memory_pool : mempool::noncopyable - { - public: - typedef cl_mem pointer_type; - typedef size_t size_type; - - private: - typedef uint32_t bin_nr_t; - typedef std::vector bin_t; - - typedef std::map container_t; - container_t m_container; - typedef typename container_t::value_type bin_pair_t; - - std::unique_ptr m_allocator; - - // A held block is one that's been released by the application, but that - // we are keeping around to dish out again. - unsigned m_held_blocks; - - // An active block is one that is in use by the application. - unsigned m_active_blocks; - - bool m_stop_holding; - int m_trace; - - public: - memory_pool(Allocator const &alloc=Allocator()) - : m_allocator(alloc.copy()), - m_held_blocks(0), m_active_blocks(0), m_stop_holding(false), - m_trace(false) - { - if (m_allocator->is_deferred()) - { - std::cerr << "Memory pools expect non-deferred " - "semantics from their allocators. You passed a deferred " - "allocator, i.e. an allocator whose allocations can turn out to " - "be unavailable long after allocation.\n"; - throw std::exception(); - } - } - - virtual ~memory_pool() - { - free_held(); - } - - static const unsigned mantissa_bits = 2; - static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; - - static bin_nr_t bin_number(size_type size) - { - signed l = viennacl::mempool::bitlog2(size); - size_type shifted = viennacl::mempool::signed_right_shift(size, - l-signed(mantissa_bits)); - if (size && (shifted & (1 << mantissa_bits)) == 0) - throw std::runtime_error("memory_pool::bin_number: bitlog2 fault"); - size_type chopped = shifted & mantissa_mask; - return l << mantissa_bits | chopped; - } - - void set_trace(bool flag) - { - if (flag) - ++m_trace; - else - --m_trace; - } - - static size_type alloc_size(bin_nr_t bin) - { - bin_nr_t exponent = bin >> mantissa_bits; - bin_nr_t mantissa = bin & mantissa_mask; - - size_type ones = viennacl::mempool::signed_left_shift(1, - signed(exponent)-signed(mantissa_bits) - ); - if (ones) ones -= 1; - - size_type head = viennacl::mempool::signed_left_shift( - (1<second; - } - else - return it->second; - } - - void inc_held_blocks() - { - if (m_held_blocks == 0) - start_holding_blocks(); - ++m_held_blocks; - } - - void dec_held_blocks() - { - --m_held_blocks; - if (m_held_blocks == 0) - stop_holding_blocks(); - } - - virtual void start_holding_blocks() - { } - - virtual void stop_holding_blocks() - { } - - public: - pointer_type allocate(size_type size) - { - bin_nr_t bin_nr = bin_number(size); - bin_t &bin = get_bin(bin_nr); - - if (bin.size()) - { - if (m_trace) - std::cout - << "[pool] allocation of size " << size << " served from bin " << bin_nr - << " which contained " << bin.size() << " entries" << std::endl; - return pop_block_from_bin(bin, size); - } - - size_type alloc_sz = alloc_size(bin_nr); - - assert(bin_number(alloc_sz) == bin_nr); - - if (m_trace) - std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl; - - try { return get_from_allocator(alloc_sz); } - catch (viennacl::ocl::mem_object_allocation_failure &e) - { - throw; - } - - if (m_trace) - std::cout << "[pool] allocation triggered OOM, running GC" << std::endl; - - // m_allocator->try_release_blocks(); - if (bin.size()) - return pop_block_from_bin(bin, size); - - if (m_trace) - std::cout << "[pool] allocation still OOM after GC" << std::endl; - - while (try_to_free_memory()) - { - try { return get_from_allocator(alloc_sz); } - catch (viennacl::ocl::mem_object_allocation_failure &e) - { - throw; - } - } - - std::cerr << ( - "memory_pool::allocate " - "failed to free memory for allocation\n"); - throw viennacl::ocl::mem_object_allocation_failure(); - } - - void free(pointer_type p, size_type size) - { - --m_active_blocks; - bin_nr_t bin_nr = bin_number(size); - - if (!m_stop_holding) - { - inc_held_blocks(); - get_bin(bin_nr).push_back(p); - - if (m_trace) - std::cout << "[pool] block of size " << size << " returned to bin " - << bin_nr << " which now contains " << get_bin(bin_nr).size() - << " entries" << std::endl; - } - else - m_allocator->free(p); - } - - void free_held() - { - for (bin_pair_t &bin_pair: m_container) - { - bin_t &bin = bin_pair.second; - - while (bin.size()) - { - m_allocator->free(bin.back()); - bin.pop_back(); - - dec_held_blocks(); - } - } - - assert(m_held_blocks == 0); - } - - void stop_holding() - { - m_stop_holding = true; - free_held(); - } - - unsigned active_blocks() - { return m_active_blocks; } - - unsigned held_blocks() - { return m_held_blocks; } - - bool try_to_free_memory() - { - // free largest stuff first - for (bin_pair_t &bin_pair: viennacl::mempool::reverse(m_container)) - { - bin_t &bin = bin_pair.second; - - if (bin.size()) - { - m_allocator->free(bin.back()); - bin.pop_back(); - - dec_held_blocks(); - - return true; - } - } - - return false; - } - - private: - pointer_type get_from_allocator(size_type alloc_sz) - { - pointer_type result = m_allocator->allocate(alloc_sz); - ++m_active_blocks; - - return result; - } - - pointer_type pop_block_from_bin(bin_t &bin, size_type size) - { - pointer_type result = bin.back(); - bin.pop_back(); - - dec_held_blocks(); - ++m_active_blocks; - - return result; - } - }; - - - // - //}}}} /** @brief Manages an OpenCL context and provides the respective convenience functions for creating buffers, etc. @@ -574,13 +301,17 @@ public: { if(use_mempool){ cl_mem mem = get_mempool()->allocate(size); + std::cout << "[mempool]: got a memory: " << mem << std::endl; return mem; } +#if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Creating memory of size " << size << " for context " << h_ << " (unsafe, returning cl_mem directly)" << std::endl; +#endif if (ptr && !(flags & CL_MEM_USE_HOST_PTR)) flags |= CL_MEM_COPY_HOST_PTR; cl_int err; cl_mem mem = clCreateBuffer(h_.get(), flags, size, ptr, &err); + std::cout << "[viennacl]: create a buffer: " << mem << std::endl; VIENNACL_ERR_CHECK(err); return mem; } diff --git a/viennacl/ocl/mempool/mempool.hpp b/viennacl/ocl/mempool/mempool.hpp new file mode 100644 index 00000000..0d3e0ccb --- /dev/null +++ b/viennacl/ocl/mempool/mempool.hpp @@ -0,0 +1,296 @@ +#ifndef VIENNACL_OCL_MEMPOOL_HPP_ +#define VIENNACL_OCL_MEMPOOL_HPP_ + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#include +#include +#include +#include +#include + +#include "viennacl/ocl/mempool/bitlog.hpp" +#include "viennacl/ocl/mempool/mempool_utils.hpp" +#include "viennacl/ocl/error.hpp" + +namespace viennacl +{ +namespace ocl +{ + + template + class memory_pool : mempool::noncopyable + { + public: + typedef cl_mem pointer_type; + typedef size_t size_type; + + private: + typedef uint32_t bin_nr_t; + typedef std::vector bin_t; + + typedef std::map container_t; + container_t m_container; + typedef typename container_t::value_type bin_pair_t; + + std::unique_ptr m_allocator; + + // A held block is one that's been released by the application, but that + // we are keeping around to dish out again. + unsigned m_held_blocks; + + // An active block is one that is in use by the application. + unsigned m_active_blocks; + + bool m_stop_holding; + int m_trace; + + public: + memory_pool(Allocator const &alloc=Allocator()) + : m_allocator(alloc.copy()), + m_held_blocks(0), m_active_blocks(0), m_stop_holding(false), + m_trace(false) + { + if (m_allocator->is_deferred()) + { + std::cerr << "Memory pools expect non-deferred " + "semantics from their allocators. You passed a deferred " + "allocator, i.e. an allocator whose allocations can turn out to " + "be unavailable long after allocation.\n"; + throw std::exception(); + } + } + + virtual ~memory_pool() + { + free_held(); + } + + static const unsigned mantissa_bits = 2; + static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; + + static bin_nr_t bin_number(size_type size) + { + signed l = viennacl::mempool::bitlog2(size); + size_type shifted = viennacl::mempool::signed_right_shift(size, + l-signed(mantissa_bits)); + if (size && (shifted & (1 << mantissa_bits)) == 0) + throw std::runtime_error("memory_pool::bin_number: bitlog2 fault"); + size_type chopped = shifted & mantissa_mask; + return l << mantissa_bits | chopped; + } + + void set_trace(bool flag) + { + if (flag) + ++m_trace; + else + --m_trace; + } + + static size_type alloc_size(bin_nr_t bin) + { + bin_nr_t exponent = bin >> mantissa_bits; + bin_nr_t mantissa = bin & mantissa_mask; + + size_type ones = viennacl::mempool::signed_left_shift(1, + signed(exponent)-signed(mantissa_bits) + ); + if (ones) ones -= 1; + + size_type head = viennacl::mempool::signed_left_shift( + (1<second; + } + else + return it->second; + } + + void inc_held_blocks() + { + if (m_held_blocks == 0) + start_holding_blocks(); + ++m_held_blocks; + } + + void dec_held_blocks() + { + --m_held_blocks; + if (m_held_blocks == 0) + stop_holding_blocks(); + } + + virtual void start_holding_blocks() + { } + + virtual void stop_holding_blocks() + { } + + public: + pointer_type allocate(size_type size) + { + bin_nr_t bin_nr = bin_number(size); + bin_t &bin = get_bin(bin_nr); + + if (bin.size()) + { + if (m_trace) + std::cout + << "[pool] allocation of size " << size << " served from bin " << bin_nr + << " which contained " << bin.size() << " entries" << std::endl; + return pop_block_from_bin(bin, size); + } + + size_type alloc_sz = alloc_size(bin_nr); + + assert(bin_number(alloc_sz) == bin_nr); + + if (m_trace) + std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl; + + try { return get_from_allocator(alloc_sz); } + catch (viennacl::ocl::mem_object_allocation_failure &e) + { + throw; + } + + if (m_trace) + std::cout << "[pool] allocation triggered OOM, running GC" << std::endl; + + // m_allocator->try_release_blocks(); + if (bin.size()) + return pop_block_from_bin(bin, size); + + if (m_trace) + std::cout << "[pool] allocation still OOM after GC" << std::endl; + + while (try_to_free_memory()) + { + try { return get_from_allocator(alloc_sz); } + catch (viennacl::ocl::mem_object_allocation_failure &e) + { + throw; + } + } + + std::cerr << ( + "memory_pool::allocate " + "failed to free memory for allocation\n"); + throw viennacl::ocl::mem_object_allocation_failure(); + } + + void free(pointer_type p, size_type size) + { + --m_active_blocks; + bin_nr_t bin_nr = bin_number(size); + + if (!m_stop_holding) + { + std::cout << "[mempool]: freeing the memory " << + p << ". So that it could be used again."<< std::endl; + inc_held_blocks(); + get_bin(bin_nr).push_back(p); + + if (m_trace) + std::cout << "[pool] block of size " << size << " returned to bin " + << bin_nr << " which now contains " << get_bin(bin_nr).size() + << " entries" << std::endl; + } + else + m_allocator->free(p); + } + + void free_held() + { + for (bin_pair_t &bin_pair: m_container) + { + bin_t &bin = bin_pair.second; + + while (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + } + } + + assert(m_held_blocks == 0); + } + + void stop_holding() + { + m_stop_holding = true; + free_held(); + } + + unsigned active_blocks() + { return m_active_blocks; } + + unsigned held_blocks() + { return m_held_blocks; } + + bool try_to_free_memory() + { + // free largest stuff first + for (bin_pair_t &bin_pair: viennacl::mempool::reverse(m_container)) + { + bin_t &bin = bin_pair.second; + + if (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + + return true; + } + } + + return false; + } + + private: + pointer_type get_from_allocator(size_type alloc_sz) + { + pointer_type result = m_allocator->allocate(alloc_sz); + ++m_active_blocks; + + return result; + } + + pointer_type pop_block_from_bin(bin_t &bin, size_type size) + { + pointer_type result = bin.back(); + bin.pop_back(); + + dec_held_blocks(); + ++m_active_blocks; + + return result; + } + }; +} +} + +#endif + -- GitLab From 7be285e14cc027258ae9b600e47184e81f6f33aa Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 15 Nov 2018 19:29:38 -0600 Subject: [PATCH 13/46] adds debugging helpers --- viennacl/linalg/opencl/vector_operations.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/viennacl/linalg/opencl/vector_operations.hpp b/viennacl/linalg/opencl/vector_operations.hpp index b7602425..6e8daa66 100644 --- a/viennacl/linalg/opencl/vector_operations.hpp +++ b/viennacl/linalg/opencl/vector_operations.hpp @@ -810,6 +810,7 @@ void norm_reduction_impl(vector_base const & vec, viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector::program_name(), "norm"); assert( (k.global_work_size() / k.local_work_size() <= partial_result.size()) && bool("Size mismatch for partial reduction in norm_reduction_impl()") ); + std::cout << "Computing norm of " << viennacl::traits::opencl_handle(vec) << std::endl; viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec), cl_uint(viennacl::traits::start(vec)), @@ -930,6 +931,7 @@ template void norm_2_cpu(vector_base const & vec, T & result) { + std::cout << "norm_2_kernel asked for a vector.\n"; vcl_size_t work_groups = 128; viennacl::vector temp(work_groups, viennacl::traits::context(vec), USE_MEMPOOL); -- GitLab From a7949822f576917d3fbb2b64e29b076714992bbb Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 21 Nov 2018 05:44:10 -0600 Subject: [PATCH 14/46] makes the destructor virtual --- viennacl/ocl/context.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index e133a458..b2e0e754 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -134,7 +134,8 @@ namespace ocl bool is_deferred() const { return false; } - virtual ~cl_immediate_allocator() {} + virtual ~cl_immediate_allocator() + {} }; @@ -960,8 +961,7 @@ inline void viennacl::ocl::kernel::set_work_size_defaults() } } -// {{{ definitionof cl_immediate_allocator - +// {{{ definition of cl_immediate_allocator::allocate cl_mem cl_immediate_allocator::allocate(size_t s) { -- GitLab From 3f2cff3caae57004a4efda561677e1fffb31de21 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Mon, 26 Nov 2018 12:32:22 -0600 Subject: [PATCH 15/46] introduced pooled_handle as a sub-class of handle --- viennacl/backend/mem_handle.hpp | 34 +------------ viennacl/backend/memory.hpp | 10 +++- viennacl/ocl/context.hpp | 86 ++++++++++++++++++++++++++++++++- viennacl/ocl/handle.hpp | 26 +++------- viennacl/vector.hpp | 3 -- 5 files changed, 101 insertions(+), 58 deletions(-) diff --git a/viennacl/backend/mem_handle.hpp b/viennacl/backend/mem_handle.hpp index 54b8a6cf..1b8003de 100644 --- a/viennacl/backend/mem_handle.hpp +++ b/viennacl/backend/mem_handle.hpp @@ -94,7 +94,7 @@ public: typedef viennacl::tools::shared_ptr cuda_handle_type; /** @brief Default CTOR. No memory is allocated */ - mem_handle() : p_used_mempool(false), active_handle_(MEMORY_NOT_INITIALIZED), size_in_bytes_(0) {} + mem_handle() : active_handle_(MEMORY_NOT_INITIALIZED), size_in_bytes_(0) {} /** @brief Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated. */ ram_handle_type & ram_handle() { return ram_handle_; } @@ -150,25 +150,6 @@ public: } } - - bool get_used_mempool(bool u) - { - return p_used_mempool; - } - - void set_used_mempool(bool u) - { - p_used_mempool = u; -#ifndef VIENNACL_WITH_OPENCL - std::cerr << "Memory pool allocation for non-OpenCL backends not supported yet.\n"; - throw std::exception(); -#endif -#ifdef VIENNACL_WITH_OPENCL - opencl_handle_.used_mempool(u); -#endif - } - - /** @brief Compares the two handles and returns true if the active memory handles in the two mem_handles point to the same buffer. */ bool operator==(mem_handle const & other) const { @@ -253,20 +234,9 @@ public: void raw_size(vcl_size_t new_size) { size_in_bytes_ = new_size; } ~mem_handle() - { - if(p_used_mempool) - { - -#ifdef VIENNACL_WITH_OPENCL - viennacl::ocl::context ctx = opencl_handle_.context(); - ctx.deallocate_memory_in_pool(opencl_handle_, raw_size()); -#endif - - } - } + {} private: - bool p_used_mempool; memory_types active_handle_; ram_handle_type ram_handle_; #ifdef VIENNACL_WITH_OPENCL diff --git a/viennacl/backend/memory.hpp b/viennacl/backend/memory.hpp index e1d85c4c..6b2ef70e 100644 --- a/viennacl/backend/memory.hpp +++ b/viennacl/backend/memory.hpp @@ -100,7 +100,15 @@ namespace backend #ifdef VIENNACL_WITH_OPENCL case OPENCL_MEMORY: handle.opencl_handle().context(ctx.opencl_context()); - handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr, use_mempool); + if(use_mempool) + handle.opencl_handle() = + viennacl::ocl::pooled_clmem_handle( + opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr, use_mempool), + ctx.opencl_context(), + size_in_bytes); + else + handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr); + handle.raw_size(size_in_bytes); break; #endif diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index b2e0e754..11ba9aea 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -140,8 +140,74 @@ namespace ocl // }}} + + // {{{ pooled handle + // + + class pooled_clmem_handle: public handle + { + protected: + typedef handle super; + + public: + pooled_clmem_handle() : super(), m_size(0), m_ref(0) {} + pooled_clmem_handle(const cl_mem & something, viennacl::ocl::context const & c, vcl_size_t & _s, uint32_t _r=1) : super(something, c), m_size(_s), m_ref(_r) + {} + pooled_clmem_handle(const pooled_clmem_handle & other) : super(other), m_size(other.m_size), m_ref(other.m_ref) + { + if(h_!=0) + inc(); + } + + pooled_clmem_handle & operator=(const pooled_clmem_handle & other) + { + if (h_ != 0) + dec(); + h_ = other.h_; + p_context_ = other.p_context_; + m_size = other.m_size; + m_ref = other.m_ref; + inc(); + return *this; + } + + pooled_clmem_handle & operator=(const cl_mem & something) + { + std::cerr << "[pooled_handle]: Pooled handle needs to know about size\n"; + throw std::exception(); + return *this; + } + + /** @brief Swaps the OpenCL handle of two handle objects */ + pooled_clmem_handle & swap(pooled_clmem_handle & other) + { + cl_mem tmp = other.h_; + other.h_ = this->h_; + this->h_ = tmp; + viennacl::ocl::context const * tmp2 = other.p_context_; + other.p_context_ = this->p_context_; + this->p_context_ = tmp2; + size_t tmp3 = other.m_size; + other.m_size = this->m_size; + this->m_size = tmp3; + + uint32_t tmp4 = other.m_ref; + other.m_ref = this->m_ref; + this->m_ref = tmp4; + + return *this; + } + + void inc() { m_ref += 1;} + void dec(); + private: + size_t m_size; + uint32_t m_ref; + }; + + // }}} /** @brief Manages an OpenCL context and provides the respective convenience functions for creating buffers, etc. * @@ -319,9 +385,9 @@ public: /// [KK]: TODOTODOTODOTODO - void deallocate_memory_in_pool(viennacl::ocl::handle& mem_handle, size_t size) + void deallocate_memory_in_pool(cl_mem p, size_t size) const { - get_mempool()->free(mem_handle.get(), size); + get_mempool()->free(p, size); } @@ -993,6 +1059,22 @@ cl_mem cl_immediate_allocator::allocate(size_t s) // }}} +// {{{ pooled handle dec + +void pooled_clmem_handle::dec() +{ + if(m_ref == 0) { + std::cerr << "[pooled_handle]: Destroying an already destroyed memory object." << std::endl; + throw std::exception(); + } + m_ref-=1; + if(m_ref == 0) { + p_context_->deallocate_memory_in_pool(h_, m_size); + } +} + +// }}} + } } diff --git a/viennacl/ocl/handle.hpp b/viennacl/ocl/handle.hpp index 631f58d3..fac102e8 100644 --- a/viennacl/ocl/handle.hpp +++ b/viennacl/ocl/handle.hpp @@ -155,16 +155,10 @@ namespace viennacl class handle { public: - handle() : used_mempool_(false), h_(0), p_context_(NULL) {} - handle(const OCL_TYPE & something, viennacl::ocl::context const & c, bool u = false) : used_mempool_(u), h_(something), p_context_(&c) - { - if((typeid(OCL_TYPE) != typeid(cl_mem)) && used_mempool_) - { - std::cerr << "[handle]: memory pool is only available for memory objects." << std::endl; - throw std::exception(); - } - } - handle(const handle & other) : used_mempool_(other.used_mempool_), h_(other.h_), p_context_(other.p_context_) { + handle() : h_(0), p_context_(NULL) {} + handle(const OCL_TYPE & something, viennacl::ocl::context const & c) : h_(something), p_context_(&c) + {} + handle(const handle & other) : h_(other.h_), p_context_(other.p_context_) { if (h_ != 0) inc(); } ~handle() { if (h_ != 0) dec(); } @@ -208,7 +202,6 @@ namespace viennacl return *p_context_; } void context(viennacl::ocl::context const & c) { p_context_ = &c; } - void used_mempool(bool u) { used_mempool_ = u; } /** @brief Swaps the OpenCL handle of two handle objects */ @@ -228,16 +221,9 @@ namespace viennacl void inc() { handle_inc_dec_helper::inc(h_); } /** @brief Manually decrement the OpenCL reference count. Typically called automatically, but might be useful with user-supplied memory objects. */ void dec() { - if(!used_mempool_) - { - // only handling the freeing of memory through this class if there - // is no mempool, otherwise for now handling it through the class - // vector base. or whatever equivalent. - handle_inc_dec_helper::dec(h_); - } + handle_inc_dec_helper::dec(h_); } - private: - bool used_mempool_; + protected: OCL_TYPE h_; viennacl::ocl::context const * p_context_; }; diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index 9e01222e..c0fa4de8 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -261,9 +261,6 @@ vector_base::vector_base(size_type vec_size, viennac { if (size_ > 0) { -#ifdef VIENNACL_WITH_OPENCL - elements_.set_used_mempool(use_mempool); -#endif viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx, NULL, use_mempool); clear(); } -- GitLab From b1218dbc472752a43427bfa868acb41241c772b8 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 7 Dec 2018 10:32:02 -0600 Subject: [PATCH 16/46] still some issues with destructing --- viennacl/backend/memory.hpp | 2 ++ viennacl/ocl/context.hpp | 31 +++++++++++++++++++++++++------ viennacl/ocl/handle.hpp | 9 +++------ viennacl/ocl/mempool/mempool.hpp | 5 +++-- 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/viennacl/backend/memory.hpp b/viennacl/backend/memory.hpp index 6b2ef70e..681385b7 100644 --- a/viennacl/backend/memory.hpp +++ b/viennacl/backend/memory.hpp @@ -101,11 +101,13 @@ namespace backend case OPENCL_MEMORY: handle.opencl_handle().context(ctx.opencl_context()); if(use_mempool) + { handle.opencl_handle() = viennacl::ocl::pooled_clmem_handle( opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr, use_mempool), ctx.opencl_context(), size_in_bytes); + } else handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr); diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 11ba9aea..58f6a17d 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -152,7 +152,13 @@ namespace ocl public: pooled_clmem_handle() : super(), m_size(0), m_ref(0) {} pooled_clmem_handle(const cl_mem & something, viennacl::ocl::context const & c, vcl_size_t & _s, uint32_t _r=1) : super(something, c), m_size(_s), m_ref(_r) - {} + {if(h_!=0) + { + inc(); + cl_int err = clRetainMemObject(something); + VIENNACL_ERR_CHECK(err); + } + } pooled_clmem_handle(const pooled_clmem_handle & other) : super(other), m_size(other.m_size), m_ref(other.m_ref) { if(h_!=0) @@ -200,8 +206,18 @@ namespace ocl return *this; } - void inc() { m_ref += 1;} - void dec(); + void inc() + { + cl_int err = clRetainMemObject(h_); + VIENNACL_ERR_CHECK(err); + std::cout << "[pooled_handle]: Incrementing counter." << std::endl; + ++m_ref; + } + inline virtual void dec(); + virtual ~pooled_clmem_handle() { + if (h_!=0) dec(); + } + private: size_t m_size; uint32_t m_ref; @@ -366,9 +382,11 @@ public: */ cl_mem create_memory_without_smart_handle(cl_mem_flags flags, unsigned int size, void * ptr = NULL, bool use_mempool = false) const { + if(use_mempool){ + std::cout << "[mempool]: querying for memory\n"; cl_mem mem = get_mempool()->allocate(size); - std::cout << "[mempool]: got a memory: " << mem << std::endl; + std::cout << "[mempool]: gave memory at: " << mem << std::endl; return mem; } #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) @@ -378,7 +396,7 @@ public: flags |= CL_MEM_COPY_HOST_PTR; cl_int err; cl_mem mem = clCreateBuffer(h_.get(), flags, size, ptr, &err); - std::cout << "[viennacl]: create a buffer: " << mem << std::endl; + std::cout << "[viennacl]: created a buffer: " << mem << std::endl; VIENNACL_ERR_CHECK(err); return mem; } @@ -1063,11 +1081,12 @@ cl_mem cl_immediate_allocator::allocate(size_t s) void pooled_clmem_handle::dec() { + std::cout << "[pooled_handle]: Decrementing ref counter of value " << m_ref << std::endl; if(m_ref == 0) { std::cerr << "[pooled_handle]: Destroying an already destroyed memory object." << std::endl; throw std::exception(); } - m_ref-=1; + --m_ref; if(m_ref == 0) { p_context_->deallocate_memory_in_pool(h_, m_size); } diff --git a/viennacl/ocl/handle.hpp b/viennacl/ocl/handle.hpp index fac102e8..8bc678c9 100644 --- a/viennacl/ocl/handle.hpp +++ b/viennacl/ocl/handle.hpp @@ -61,9 +61,6 @@ namespace viennacl static void dec(cl_mem & something) { -#ifdef VIENNACL_DEBUG_ALL - std :: cout << "[viennacl]: Deallocating from handle...\n"; -#endif cl_int err = clReleaseMemObject(something); VIENNACL_ERR_CHECK(err); } @@ -160,7 +157,7 @@ namespace viennacl {} handle(const handle & other) : h_(other.h_), p_context_(other.p_context_) { if (h_ != 0) inc(); } - ~handle() { if (h_ != 0) dec(); } + virtual ~handle() { if (h_ != 0) dec(); } /** @brief Copies the OpenCL handle from the provided handle. Does not take ownership like e.g. std::auto_ptr<>, so both handle objects are valid (more like shared_ptr). */ handle & operator=(const handle & other) @@ -218,9 +215,9 @@ namespace viennacl } /** @brief Manually increment the OpenCL reference count. Typically called automatically, but is necessary if user-supplied memory objects are wrapped. */ - void inc() { handle_inc_dec_helper::inc(h_); } + virtual void inc() { handle_inc_dec_helper::inc(h_); } /** @brief Manually decrement the OpenCL reference count. Typically called automatically, but might be useful with user-supplied memory objects. */ - void dec() { + virtual void dec() { handle_inc_dec_helper::dec(h_); } protected: diff --git a/viennacl/ocl/mempool/mempool.hpp b/viennacl/ocl/mempool/mempool.hpp index 0d3e0ccb..8a82a930 100644 --- a/viennacl/ocl/mempool/mempool.hpp +++ b/viennacl/ocl/mempool/mempool.hpp @@ -199,13 +199,14 @@ namespace ocl void free(pointer_type p, size_type size) { + + std::cout << "[mempool]: freeing the memory " << + p << ". So that it could be used again."<< std::endl; --m_active_blocks; bin_nr_t bin_nr = bin_number(size); if (!m_stop_holding) { - std::cout << "[mempool]: freeing the memory " << - p << ". So that it could be used again."<< std::endl; inc_held_blocks(); get_bin(bin_nr).push_back(p); -- GitLab From ebcc1c20a6e77ab54b5c0324fc9b1399c2180401 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 7 Dec 2018 18:45:23 -0600 Subject: [PATCH 17/46] minor comments --- viennacl/backend/memory.hpp | 1 + viennacl/ocl/handle.hpp | 1 - viennacl/vector.hpp | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/viennacl/backend/memory.hpp b/viennacl/backend/memory.hpp index 681385b7..5b238e32 100644 --- a/viennacl/backend/memory.hpp +++ b/viennacl/backend/memory.hpp @@ -102,6 +102,7 @@ namespace backend handle.opencl_handle().context(ctx.opencl_context()); if(use_mempool) { + // If using memory pool then use a pooled handle handle.opencl_handle() = viennacl::ocl::pooled_clmem_handle( opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr, use_mempool), diff --git a/viennacl/ocl/handle.hpp b/viennacl/ocl/handle.hpp index 8bc678c9..8b0f1a10 100644 --- a/viennacl/ocl/handle.hpp +++ b/viennacl/ocl/handle.hpp @@ -187,7 +187,6 @@ namespace viennacl return *this; } - /** @brief Implicit conversion to the plain OpenCL handle. DEPRECATED and will be removed some time in the future. */ operator OCL_TYPE() const { return h_; } diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index c0fa4de8..df3581d9 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -261,6 +261,7 @@ vector_base::vector_base(size_type vec_size, viennac { if (size_ > 0) { + // [kk:] this is the constructor that we are concerned about viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx, NULL, use_mempool); clear(); } -- GitLab From 66c53cc821661da12952db662e57b32a9b6a999a Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 15 Dec 2018 11:09:09 -0600 Subject: [PATCH 18/46] First phase of making the mem_handle a templated class --- viennacl/backend/mem_handle.hpp | 7 +-- viennacl/backend/memory.hpp | 27 ++++++---- viennacl/backend/util.hpp | 18 ++++--- viennacl/detail/vector_def.hpp | 4 +- viennacl/forwards.h | 19 ++++--- viennacl/scalar.hpp | 4 +- viennacl/tools/entry_proxy.hpp | 14 ++--- viennacl/traits/context.hpp | 3 +- viennacl/traits/handle.hpp | 94 +++++++++++++++++---------------- 9 files changed, 106 insertions(+), 84 deletions(-) diff --git a/viennacl/backend/mem_handle.hpp b/viennacl/backend/mem_handle.hpp index 1b8003de..29a09d8c 100644 --- a/viennacl/backend/mem_handle.hpp +++ b/viennacl/backend/mem_handle.hpp @@ -87,6 +87,7 @@ inline memory_types default_memory_type(memory_types new_memory_type) { return d * Instead, this class collects all the necessary conditional compilations. * */ +template class mem_handle { public: @@ -103,9 +104,9 @@ public: #ifdef VIENNACL_WITH_OPENCL /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */ - viennacl::ocl::handle & opencl_handle() { return opencl_handle_; } + OCL_Handle & opencl_handle() { return opencl_handle_; } /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */ - viennacl::ocl::handle const & opencl_handle() const { return opencl_handle_; } + OCL_Handle const & opencl_handle() const { return opencl_handle_; } #endif #ifdef VIENNACL_WITH_CUDA @@ -240,7 +241,7 @@ private: memory_types active_handle_; ram_handle_type ram_handle_; #ifdef VIENNACL_WITH_OPENCL - viennacl::ocl::handle opencl_handle_; + OCL_Handle opencl_handle_; #endif #ifdef VIENNACL_WITH_CUDA cuda_handle_type cuda_handle_; diff --git a/viennacl/backend/memory.hpp b/viennacl/backend/memory.hpp index 5b238e32..e6311434 100644 --- a/viennacl/backend/memory.hpp +++ b/viennacl/backend/memory.hpp @@ -84,7 +84,8 @@ namespace backend * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. * */ - inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL, bool use_mempool = false) + template > + inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL, bool use_mempool = false) { if (size_in_bytes > 0) { @@ -148,8 +149,9 @@ namespace backend * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes) * @param bytes_to_copy Number of bytes to be copied */ - inline void memory_copy(mem_handle const & src_buffer, - mem_handle & dst_buffer, + template > + inline void memory_copy(mem_handle const & src_buffer, + mem_handle & dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy) @@ -185,8 +187,9 @@ namespace backend /** @brief A 'shallow' copy operation from an initialized buffer to an uninitialized buffer. * The uninitialized buffer just copies the raw handle. */ - inline void memory_shallow_copy(mem_handle const & src_buffer, - mem_handle & dst_buffer) + template > + inline void memory_shallow_copy(mem_handle const & src_buffer, + mem_handle & dst_buffer) { assert( (dst_buffer.get_active_handle_id() == MEMORY_NOT_INITIALIZED) && bool("Shallow copy on already initialized memory not supported!")); @@ -228,7 +231,8 @@ namespace backend * @param ptr Pointer to the first byte to be written * @param async Whether the operation should be asynchronous */ - inline void memory_write(mem_handle & dst_buffer, + template > + inline void memory_write(mem_handle & dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void * ptr, @@ -269,7 +273,8 @@ namespace backend * @param ptr Location in main RAM where to read data should be written to * @param async Whether the operation should be asynchronous */ - inline void memory_read(mem_handle const & src_buffer, + template > + inline void memory_read(mem_handle const & src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void * ptr, @@ -375,8 +380,8 @@ namespace backend /** @brief Switches the active memory domain within a memory handle. Data is copied if the new active domain differs from the old one. Memory in the source handle is not free'd. */ - template - void switch_memory_context(mem_handle & handle, viennacl::context new_ctx) + template> + void switch_memory_context(mem_handle & handle, viennacl::context new_ctx) { if (handle.get_active_handle_id() == new_ctx.memory_type()) return; @@ -477,8 +482,8 @@ namespace backend /** @brief Copies data of the provided 'DataType' from 'handle_src' to 'handle_dst' and converts the data if the binary representation of 'DataType' among the memory domains differs. */ - template - void typesafe_memory_copy(mem_handle const & handle_src, mem_handle & handle_dst) + template> + void typesafe_memory_copy(mem_handle const & handle_src, mem_handle & handle_dst) { if (handle_dst.get_active_handle_id() == MEMORY_NOT_INITIALIZED) handle_dst.switch_active_handle_id(default_memory_type()); diff --git a/viennacl/backend/util.hpp b/viennacl/backend/util.hpp index 9aaeb2e9..b484bdea 100644 --- a/viennacl/backend/util.hpp +++ b/viennacl/backend/util.hpp @@ -97,7 +97,8 @@ class typesafe_host_array public: explicit typesafe_host_array() : bytes_buffer_(NULL), buffer_size_(0) {} - explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num) + template > + explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num) { resize(handle, num); } @@ -122,7 +123,8 @@ public: // /** @brief Resize without initializing the new memory */ - void raw_resize(mem_handle const & /*handle*/, vcl_size_t num) + template > + void raw_resize(mem_handle const & /*handle*/, vcl_size_t num) { buffer_size_ = sizeof(cpu_type) * num; @@ -135,7 +137,8 @@ public: } /** @brief Resize including initialization of new memory (cf. std::vector<>) */ - void resize(mem_handle const & handle, vcl_size_t num) + template > + void resize(mem_handle const & handle, vcl_size_t num) { raw_resize(handle, num); @@ -171,7 +174,8 @@ class typesafe_host_array public: explicit typesafe_host_array() : convert_to_opencl_( (default_memory_type() == OPENCL_MEMORY) ? true : false), bytes_buffer_(NULL), buffer_size_(0) {} - explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : convert_to_opencl_(false), bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num) + template > + explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : convert_to_opencl_(false), bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num) { resize(handle, num); } @@ -220,7 +224,8 @@ public: // /** @brief Resize without initializing the new memory */ - void raw_resize(mem_handle const & handle, vcl_size_t num) + template > + void raw_resize(mem_handle const & handle, vcl_size_t num) { buffer_size_ = sizeof(cpu_type) * num; (void)handle; //silence unused variable warning if compiled without OpenCL support @@ -246,7 +251,8 @@ public: } /** @brief Resize including initialization of new memory (cf. std::vector<>) */ - void resize(mem_handle const & handle, vcl_size_t num) + template > + void resize(mem_handle const & handle, vcl_size_t num) { raw_resize(handle, num); diff --git a/viennacl/detail/vector_def.hpp b/viennacl/detail/vector_def.hpp index 4cc98d46..b42c05ec 100644 --- a/viennacl/detail/vector_def.hpp +++ b/viennacl/detail/vector_def.hpp @@ -100,7 +100,7 @@ struct zero_vector : public scalar_vector * * @tparam NumericT The floating point type, either 'float' or 'double' */ -template +template class vector_base { typedef vector_base self_type; @@ -108,7 +108,7 @@ class vector_base public: typedef scalar value_type; typedef NumericT cpu_value_type; - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle handle_type; typedef SizeT size_type; typedef DistanceT difference_type; typedef const_vector_iterator const_iterator; diff --git a/viennacl/forwards.h b/viennacl/forwards.h index 092b6e51..f550ae69 100644 --- a/viennacl/forwards.h +++ b/viennacl/forwards.h @@ -69,6 +69,8 @@ #include "viennacl/meta/enable_if.hpp" #include "viennacl/version.hpp" +#include "CL/cl.h" + /** @brief Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them. */ namespace viennacl { @@ -246,17 +248,23 @@ namespace viennacl /** @brief A tag class representing sign flips (for scalars only. Vectors and matrices use the standard multiplication by the scalar -1.0) */ struct op_flip_sign {}; + /** @brief OpenCL backend. Manages platforms, contexts, buffers, kernels, etc. */ + namespace ocl { + template + class handle; + } + //forward declaration of basic types: - template + template> class scalar; template class scalar_expression; - template + template> class entry_proxy; - template + template> class const_entry_proxy; template @@ -283,7 +291,7 @@ namespace viennacl template struct scalar_vector; - template + template> class vector_base; template @@ -376,6 +384,7 @@ namespace viennacl namespace backend { + template > class mem_handle; } @@ -1023,8 +1032,6 @@ namespace viennacl } } - /** @brief OpenCL backend. Manages platforms, contexts, buffers, kernels, etc. */ - namespace ocl {} /** @brief Namespace containing many meta-functions. */ namespace result_of {} diff --git a/viennacl/scalar.hpp b/viennacl/scalar.hpp index 4eaf7522..5fdc5b00 100644 --- a/viennacl/scalar.hpp +++ b/viennacl/scalar.hpp @@ -343,12 +343,12 @@ private: * * @tparam NumericT Either float or double. Checked at compile time. */ -template +template class scalar { typedef scalar self_type; public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle handle_type; typedef vcl_size_t size_type; /** @brief Returns the underlying host scalar type. */ diff --git a/viennacl/tools/entry_proxy.hpp b/viennacl/tools/entry_proxy.hpp index 64114eb2..7b055794 100644 --- a/viennacl/tools/entry_proxy.hpp +++ b/viennacl/tools/entry_proxy.hpp @@ -37,11 +37,11 @@ namespace viennacl * * @tparam NumericT Either float or double */ -template +template class entry_proxy { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle handle_type; /** @brief The constructor for the proxy class. Declared explicit to avoid any surprises created by the compiler. * @@ -159,7 +159,7 @@ private: } vcl_size_t index_; - viennacl::backend::mem_handle & mem_handle_; + viennacl::backend::mem_handle & mem_handle_; }; //entry_proxy @@ -175,12 +175,12 @@ private: * * @tparam NumericT Either float or double */ -template +template class const_entry_proxy { - typedef const_entry_proxy self_type; + typedef const_entry_proxy self_type; public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle handle_type; /** @brief The constructor for the proxy class. Declared explicit to avoid any surprises created by the compiler. * @@ -226,7 +226,7 @@ private: } vcl_size_t index_; - viennacl::backend::mem_handle const & mem_handle_; + handle_type const & mem_handle_; }; //entry_proxy } diff --git a/viennacl/traits/context.hpp b/viennacl/traits/context.hpp index c84ab99c..0613ee37 100644 --- a/viennacl/traits/context.hpp +++ b/viennacl/traits/context.hpp @@ -48,7 +48,8 @@ viennacl::context context(T const & t) } /** @brief Returns an ID for the currently active memory domain of an object */ -inline viennacl::context context(viennacl::backend::mem_handle const & h) +template > +inline viennacl::context context(viennacl::backend::mem_handle const & h) { #ifdef VIENNACL_WITH_OPENCL if (h.get_active_handle_id() == OPENCL_MEMORY) diff --git a/viennacl/traits/handle.hpp b/viennacl/traits/handle.hpp index 7a2af9a8..cef56d61 100644 --- a/viennacl/traits/handle.hpp +++ b/viennacl/traits/handle.hpp @@ -37,15 +37,15 @@ namespace traits // Generic memory handle // /** @brief Returns the generic memory handle of an object. Non-const version. */ -template -viennacl::backend::mem_handle & handle(T & obj) +template> +viennacl::backend::mem_handle & handle(T & obj) { return obj.handle(); } /** @brief Returns the generic memory handle of an object. Const-version. */ -template -viennacl::backend::mem_handle const & handle(T const & obj) +template> +viennacl::backend::mem_handle const & handle(T const & obj) { return obj.handle(); } @@ -58,96 +58,96 @@ inline long handle(long val) { return val; } //for unification purposes whe inline float handle(float val) { return val; } //for unification purposes when passing CPU-scalars to kernels inline double handle(double val) { return val; } //for unification purposes when passing CPU-scalars to kernels -template -viennacl::backend::mem_handle & handle(viennacl::scalar_expression< const LHS, const RHS, OP> & obj) +template> +viennacl::backend::mem_handle & handle(viennacl::scalar_expression< const LHS, const RHS, OP> & obj) { return handle(obj.lhs()); } -template -viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj); +template> +viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj); -template -viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj); +template> +viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj); -template -viennacl::backend::mem_handle const & handle(viennacl::scalar_expression< const LHS, const RHS, OP> const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::scalar_expression< const LHS, const RHS, OP> const & obj) { return handle(obj.lhs()); } // proxy objects require extra care (at the moment) -template -viennacl::backend::mem_handle & handle(viennacl::vector_base & obj) +template> +viennacl::backend::mem_handle & handle(viennacl::vector_base & obj) { return obj.handle(); } -template -viennacl::backend::mem_handle const & handle(viennacl::vector_base const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::vector_base const & obj) { return obj.handle(); } -template -viennacl::backend::mem_handle & handle(viennacl::matrix_range & obj) +template> +viennacl::backend::mem_handle & handle(viennacl::matrix_range & obj) { return obj.get().handle(); } -template -viennacl::backend::mem_handle const & handle(viennacl::matrix_range const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::matrix_range const & obj) { return obj.get().handle(); } -template -viennacl::backend::mem_handle & handle(viennacl::matrix_slice & obj) +template> +viennacl::backend::mem_handle & handle(viennacl::matrix_slice & obj) { return obj.get().handle(); } -template -viennacl::backend::mem_handle const & handle(viennacl::matrix_slice const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::matrix_slice const & obj) { return obj.get().handle(); } -template -viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj) +template +viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj) { return handle(obj.lhs()); } -template -viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj) { return handle(obj.rhs()); } -template -viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj) { return handle(obj.rhs()); } -template -viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj) +template +viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj) { return handle(obj.lhs()); } -template -viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj) { return handle(obj.rhs()); } -template -viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj) { return handle(obj.rhs()); } @@ -158,26 +158,28 @@ viennacl::backend::mem_handle const & handle(viennacl::matrix_expression -typename viennacl::backend::mem_handle::ram_handle_type & ram_handle(T & obj) +template> +typename viennacl::backend::mem_handle::ram_handle_type & ram_handle(T & obj) { return viennacl::traits::handle(obj).ram_handle(); } /** @brief Generic helper routine for extracting the RAM handle of a ViennaCL object. Const version. */ -template -typename viennacl::backend::mem_handle::ram_handle_type const & ram_handle(T const & obj) +template> +typename viennacl::backend::mem_handle::ram_handle_type const & ram_handle(T const & obj) { return viennacl::traits::handle(obj).ram_handle(); } /** \cond */ -inline viennacl::backend::mem_handle::ram_handle_type & ram_handle(viennacl::backend::mem_handle & h) +template +inline viennacl::tools::shared_ptr & ram_handle(viennacl::backend::mem_handle & h) { return h.ram_handle(); } -inline viennacl::backend::mem_handle::ram_handle_type const & ram_handle(viennacl::backend::mem_handle const & h) +template > +inline viennacl::tools::shared_ptr const & ram_handle(viennacl::backend::mem_handle const & h) { return h.ram_handle(); } @@ -188,15 +190,15 @@ inline viennacl::backend::mem_handle::ram_handle_type const & ram_handle(viennac // #ifdef VIENNACL_WITH_OPENCL /** @brief Generic helper routine for extracting the OpenCL handle of a ViennaCL object. Non-const version. */ -template -viennacl::ocl::handle & opencl_handle(T & obj) +template> +H & opencl_handle(T & obj) { return viennacl::traits::handle(obj).opencl_handle(); } /** @brief Generic helper routine for extracting the OpenCL handle of a ViennaCL object. Const version. */ -template -viennacl::ocl::handle const & opencl_handle(T const & obj) +template> +H const & opencl_handle(T const & obj) { return viennacl::traits::handle(obj).opencl_handle(); } -- GitLab From c31ad5cf0078ab0d4243ef1dd83a7b9ad2e4c2e1 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 15 Dec 2018 14:15:35 -0600 Subject: [PATCH 19/46] some more progress on making mem_handle a templated class --- viennacl/detail/vector_def.hpp | 4 ++-- viennacl/device_specific/forwards.h | 16 ++++++++++------ viennacl/linalg/opencl/vector_operations.hpp | 8 ++++---- viennacl/vector.hpp | 4 ++-- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/viennacl/detail/vector_def.hpp b/viennacl/detail/vector_def.hpp index b42c05ec..d611dde7 100644 --- a/viennacl/detail/vector_def.hpp +++ b/viennacl/detail/vector_def.hpp @@ -141,7 +141,7 @@ public: * @param vec_start The offset from the beginning of the buffer identified by 'h' * @param vec_stride Increment between two elements in the original buffer (in multiples of NumericT) */ - explicit vector_base(viennacl::backend::mem_handle & h, size_type vec_size, size_type vec_start, size_type vec_stride); + explicit vector_base(viennacl::backend::mem_handle & h, size_type vec_size, size_type vec_start, size_type vec_stride); /** @brief Creates a vector and allocates the necessary memory */ explicit vector_base(size_type vec_size, viennacl::context ctx = viennacl::context(), @@ -303,7 +303,7 @@ public: protected: - void set_handle(viennacl::backend::mem_handle const & h) { elements_ = h; } + void set_handle(viennacl::backend::mem_handle const & h) { elements_ = h; } /** @brief Swaps the handles of two vectors by swapping the OpenCL handles only, no data copy */ self_type & fast_swap(self_type & other); diff --git a/viennacl/device_specific/forwards.h b/viennacl/device_specific/forwards.h index 11368bb4..9c635e0c 100644 --- a/viennacl/device_specific/forwards.h +++ b/viennacl/device_specific/forwards.h @@ -217,16 +217,18 @@ class symbolic_binder { public: virtual ~symbolic_binder(){ } - virtual bool bind(viennacl::backend::mem_handle const * ph) = 0; - virtual unsigned int get(viennacl::backend::mem_handle const * ph) = 0; + virtual bool bind(viennacl::backend::mem_handle<> const * ph) = 0; + virtual unsigned int get(viennacl::backend::mem_handle<> const * ph) = 0; }; class bind_to_handle : public symbolic_binder { public: bind_to_handle() : current_arg_(0){ } - bool bind(viennacl::backend::mem_handle const * ph) {return (ph==NULL)?true:memory.insert(std::make_pair((void*)ph, current_arg_)).second; } - unsigned int get(viennacl::backend::mem_handle const * ph){ return bind(ph) ? current_arg_++ : at(memory, (void*)ph); } + + bool bind(viennacl::backend::mem_handle<> const * ph) {return (ph==NULL)?true:memory.insert(std::make_pair((void*)ph, current_arg_)).second; } + + unsigned int get(viennacl::backend::mem_handle<> const * ph){ return bind(ph) ? current_arg_++ : at(memory, (void*)ph); } private: unsigned int current_arg_; std::map memory; @@ -236,8 +238,10 @@ class bind_all_unique : public symbolic_binder { public: bind_all_unique() : current_arg_(0){ } - bool bind(viennacl::backend::mem_handle const *) {return true; } - unsigned int get(viennacl::backend::mem_handle const *){ return current_arg_++; } + bool bind(viennacl::backend::mem_handle const *) {return true; } + bool bind(viennacl::backend::mem_handle<> const *) {return true; } + unsigned int get(viennacl::backend::mem_handle const *){ return current_arg_++; } + unsigned int get(viennacl::backend::mem_handle<> const *){ return current_arg_++; } private: unsigned int current_arg_; std::map memory; diff --git a/viennacl/linalg/opencl/vector_operations.hpp b/viennacl/linalg/opencl/vector_operations.hpp index 6e8daa66..03ab6099 100644 --- a/viennacl/linalg/opencl/vector_operations.hpp +++ b/viennacl/linalg/opencl/vector_operations.hpp @@ -1287,15 +1287,15 @@ namespace detail * Note on performance: For non-in-place scans one could optimize away the temporary 'opencl_carries'-array. * This, however, only provides small savings in the latency-dominated regime, yet would effectively double the amount of code to maintain. */ - template - void scan_impl(vector_base const & input, - vector_base & output, + template> + void scan_impl(vector_base const & input, + vector_base & output, bool is_inclusive) { vcl_size_t local_worksize = 128; vcl_size_t workgroups = 128; - viennacl::backend::mem_handle opencl_carries; + viennacl::backend::mem_handle opencl_carries; viennacl::backend::memory_create(opencl_carries, sizeof(NumericT)*workgroups, viennacl::traits::context(input)); viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(input).context()); diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index df3581d9..2f5a53f8 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -104,7 +104,7 @@ private: * @tparam NumericT The underlying floating point type (either float or double) * @tparam AlignmentV Alignment of the underlying vector, @see vector */ -template +template> class const_vector_iterator { typedef const_vector_iterator self_type; @@ -112,7 +112,7 @@ public: typedef scalar value_type; typedef vcl_size_t size_type; typedef vcl_ptrdiff_t difference_type; - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle handle_type; //const_vector_iterator() {} -- GitLab From 87c8500fc003af7c3f61b3a4d7eec19f71ef2bf8 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 15 Dec 2018 17:29:48 -0600 Subject: [PATCH 20/46] added the OCLHandle as another template parameter --- viennacl/backend/memory.hpp | 46 +- viennacl/backend/opencl.hpp | 19 +- viennacl/detail/vector_def.hpp | 13 +- viennacl/forwards.h | 4 +- viennacl/linalg/opencl/vector_operations.hpp | 35 +- viennacl/vector.hpp | 812 +++++++++---------- 6 files changed, 477 insertions(+), 452 deletions(-) diff --git a/viennacl/backend/memory.hpp b/viennacl/backend/memory.hpp index e6311434..318c8efd 100644 --- a/viennacl/backend/memory.hpp +++ b/viennacl/backend/memory.hpp @@ -84,8 +84,7 @@ namespace backend * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. * */ - template > - inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL, bool use_mempool = false) + inline void memory_create(mem_handle<> & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL) { if (size_in_bytes > 0) { @@ -101,17 +100,7 @@ namespace backend #ifdef VIENNACL_WITH_OPENCL case OPENCL_MEMORY: handle.opencl_handle().context(ctx.opencl_context()); - if(use_mempool) - { - // If using memory pool then use a pooled handle - handle.opencl_handle() = - viennacl::ocl::pooled_clmem_handle( - opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr, use_mempool), - ctx.opencl_context(), - size_in_bytes); - } - else - handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr); + handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr); handle.raw_size(size_in_bytes); break; @@ -130,6 +119,37 @@ namespace backend } } + // Pooled version of the above function! + inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL) + { + if (size_in_bytes > 0) + { + if (handle.get_active_handle_id() == MEMORY_NOT_INITIALIZED) + handle.switch_active_handle_id(ctx.memory_type()); + + switch (handle.get_active_handle_id()) + { +#ifdef VIENNACL_WITH_OPENCL + case OPENCL_MEMORY: + handle.opencl_handle().context(ctx.opencl_context()); + // If using memory pool then use a pooled handle + handle.opencl_handle() = + viennacl::ocl::pooled_clmem_handle( + opencl::pooled_memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr), + ctx.opencl_context(), + size_in_bytes); + + handle.raw_size(size_in_bytes); + break; +#endif + case MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("Pooled handle only available with OpenCL memory for now!"); + } + } + } + /* inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, const void * host_ptr = NULL) { diff --git a/viennacl/backend/opencl.hpp b/viennacl/backend/opencl.hpp index 66b5c22a..10897fc1 100644 --- a/viennacl/backend/opencl.hpp +++ b/viennacl/backend/opencl.hpp @@ -52,12 +52,27 @@ namespace opencl * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) * */ -inline cl_mem memory_create(viennacl::ocl::context const & ctx, vcl_size_t size_in_bytes, const void * host_ptr = NULL, bool use_mempool = false) +inline cl_mem memory_create(viennacl::ocl::context const & ctx, vcl_size_t size_in_bytes, const void * host_ptr = NULL) { //std::cout << "Creating buffer (" << size_in_bytes << " bytes) host buffer " << host_ptr << " in context " << &ctx << std::endl; - return ctx.create_memory_without_smart_handle(CL_MEM_READ_WRITE, static_cast(size_in_bytes), const_cast(host_ptr), use_mempool); + return ctx.create_memory_without_smart_handle(CL_MEM_READ_WRITE, static_cast(size_in_bytes), const_cast(host_ptr)); } + +/** @brief Creates an array of the specified size in the current OpenCL context. If the second argument is provided, the buffer is initialized with data from that pointer. + * + * @param size_in_bytes Number of bytes to allocate + * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. + * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) + * + */ +inline cl_mem pooled_memory_create(viennacl::ocl::context const & ctx, vcl_size_t size_in_bytes, const void * host_ptr = NULL) +{ + //std::cout << "Creating buffer (" << size_in_bytes << " bytes) host buffer " << host_ptr << " in context " << &ctx << std::endl; + return ctx.create_memory_without_smart_handle(CL_MEM_READ_WRITE, static_cast(size_in_bytes), const_cast(host_ptr), true); +} + + /** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' in the OpenCL context to memory starting at address 'dst_buffer + dst_offset' in the same OpenCL context. * * @param src_buffer A smart pointer to the begin of an allocated OpenCL buffer diff --git a/viennacl/detail/vector_def.hpp b/viennacl/detail/vector_def.hpp index d611dde7..4118975d 100644 --- a/viennacl/detail/vector_def.hpp +++ b/viennacl/detail/vector_def.hpp @@ -100,15 +100,15 @@ struct zero_vector : public scalar_vector * * @tparam NumericT The floating point type, either 'float' or 'double' */ -template +template class vector_base { - typedef vector_base self_type; + typedef vector_base self_type; public: typedef scalar value_type; typedef NumericT cpu_value_type; - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle handle_type; typedef SizeT size_type; typedef DistanceT difference_type; typedef const_vector_iterator const_iterator; @@ -141,11 +141,10 @@ public: * @param vec_start The offset from the beginning of the buffer identified by 'h' * @param vec_stride Increment between two elements in the original buffer (in multiples of NumericT) */ - explicit vector_base(viennacl::backend::mem_handle & h, size_type vec_size, size_type vec_start, size_type vec_stride); + explicit vector_base(viennacl::backend::mem_handle & h, size_type vec_size, size_type vec_start, size_type vec_stride); /** @brief Creates a vector and allocates the necessary memory */ - explicit vector_base(size_type vec_size, viennacl::context ctx = viennacl::context(), - bool use_mempool = false); + explicit vector_base(size_type vec_size, viennacl::context ctx = viennacl::context()); // CUDA or host memory: explicit vector_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type vec_size, vcl_size_t start = 0, size_type stride = 1); @@ -303,7 +302,7 @@ public: protected: - void set_handle(viennacl::backend::mem_handle const & h) { elements_ = h; } + void set_handle(viennacl::backend::mem_handle const & h) { elements_ = h; } /** @brief Swaps the handles of two vectors by swapping the OpenCL handles only, no data copy */ self_type & fast_swap(self_type & other); diff --git a/viennacl/forwards.h b/viennacl/forwards.h index f550ae69..3abad300 100644 --- a/viennacl/forwards.h +++ b/viennacl/forwards.h @@ -273,7 +273,7 @@ namespace viennacl template class vector_iterator; - template + template> class const_vector_iterator; template @@ -294,7 +294,7 @@ namespace viennacl template> class vector_base; - template + template> class vector; template diff --git a/viennacl/linalg/opencl/vector_operations.hpp b/viennacl/linalg/opencl/vector_operations.hpp index 03ab6099..5d016eff 100644 --- a/viennacl/linalg/opencl/vector_operations.hpp +++ b/viennacl/linalg/opencl/vector_operations.hpp @@ -42,15 +42,6 @@ #include "viennacl/traits/handle.hpp" #include "viennacl/traits/stride.hpp" -#ifdef VIENNACL_WITH_OPENCL -#define USE_MEMPOOL true -#endif - -#ifndef VIENNACL_WITH_OPENCL -#define USE_MEMPOOL false -#endif - - namespace viennacl { namespace linalg @@ -540,7 +531,7 @@ void inner_prod_impl(vector_base const & vec1, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec1).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec1), USE_MEMPOOL); + viennacl::vector temp(work_groups, viennacl::traits::context(vec1)); temp.resize(work_groups, ctx); // bring default-constructed vectors to the correct size: // Step 1: Compute partial inner products for each work group: @@ -601,7 +592,7 @@ void inner_prod_impl(vector_base const & x, viennacl::ocl::kernel & inner_prod_kernel_8 = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector_multi_inner_prod::program_name(), "inner_prod8"); vcl_size_t work_groups = inner_prod_kernel_8.global_work_size(0) / inner_prod_kernel_8.local_work_size(0); - viennacl::vector temp(8 * work_groups, viennacl::traits::context(x), USE_MEMPOOL); + viennacl::vector temp(8 * work_groups, viennacl::traits::context(x)); vcl_size_t current_index = 0; while (current_index < vec_tuple.const_size()) @@ -771,7 +762,7 @@ void inner_prod_cpu(vector_base const & vec1, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec1).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec1), USE_MEMPOOL); + viennacl::vector temp(work_groups, viennacl::traits::context(vec1)); temp.resize(work_groups, ctx); // bring default-constructed vectors to the correct size: // Step 1: Compute partial inner products for each work group: @@ -839,7 +830,7 @@ void norm_1_impl(vector_base const & vec, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec), USE_MEMPOOL); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 1); @@ -868,7 +859,7 @@ void norm_1_cpu(vector_base const & vec, T & result) { vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec), USE_MEMPOOL); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 1); @@ -903,7 +894,7 @@ void norm_2_impl(vector_base const & vec, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec), USE_MEMPOOL); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 2); @@ -933,7 +924,7 @@ void norm_2_cpu(vector_base const & vec, { std::cout << "norm_2_kernel asked for a vector.\n"; vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec), USE_MEMPOOL); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 2); @@ -968,7 +959,7 @@ void norm_inf_impl(vector_base const & vec, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec), USE_MEMPOOL); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 0); @@ -997,7 +988,7 @@ void norm_inf_cpu(vector_base const & vec, T & result) { vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec), USE_MEMPOOL); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 0); @@ -1070,7 +1061,7 @@ void max_impl(vector_base const & x, viennacl::linalg::opencl::kernels::vector::init(ctx); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(x), USE_MEMPOOL); + viennacl::vector temp(work_groups, viennacl::traits::context(x)); viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector::program_name(), "max_kernel"); @@ -1106,7 +1097,7 @@ void max_cpu(vector_base const & x, viennacl::linalg::opencl::kernels::vector::init(ctx); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(x), USE_MEMPOOL); + viennacl::vector temp(work_groups, viennacl::traits::context(x)); viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector::program_name(), "max_kernel"); @@ -1149,7 +1140,7 @@ void min_impl(vector_base const & x, viennacl::linalg::opencl::kernels::vector::init(ctx); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(x), USE_MEMPOOL); + viennacl::vector temp(work_groups, viennacl::traits::context(x)); viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector::program_name(), "min_kernel"); @@ -1185,7 +1176,7 @@ void min_cpu(vector_base const & x, viennacl::linalg::opencl::kernels::vector::init(ctx); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(x), USE_MEMPOOL); + viennacl::vector temp(work_groups, viennacl::traits::context(x)); viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector::program_name(), "min_kernel"); diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index 2f5a53f8..074faec1 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -104,7 +104,7 @@ private: * @tparam NumericT The underlying floating point type (either float or double) * @tparam AlignmentV Alignment of the underlying vector, @see vector */ -template> +template class const_vector_iterator { typedef const_vector_iterator self_type; @@ -112,7 +112,7 @@ public: typedef scalar value_type; typedef vcl_size_t size_type; typedef vcl_ptrdiff_t difference_type; - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle handle_type; //const_vector_iterator() {} @@ -247,138 +247,138 @@ private: }; -template -vector_base::vector_base() : size_(0), start_(0), stride_(1), internal_size_(0) { /* Note: One must not call ::init() here because a vector might have been created globally before the backend has become available */ } +template +vector_base::vector_base() : size_(0), start_(0), stride_(1), internal_size_(0) { /* Note: One must not call ::init() here because a vector might have been created globally before the backend has become available */ } -template -vector_base::vector_base(viennacl::backend::mem_handle & h, - size_type vec_size, size_type vec_start, size_type vec_stride) - : size_(vec_size), start_(vec_start), stride_(vec_stride), internal_size_(vec_size), elements_(h) {} +template +vector_base::vector_base(viennacl::backend::mem_handle & h, + size_type vec_size, size_type vec_start, size_type vec_stride) +: size_(vec_size), start_(vec_start), stride_(vec_stride), internal_size_(vec_size), elements_(h) {} -template -vector_base::vector_base(size_type vec_size, viennacl::context ctx, bool use_mempool) - : size_(vec_size), start_(0), stride_(1), internal_size_(viennacl::tools::align_to_multiple(size_, dense_padding_size)) +template +vector_base::vector_base(size_type vec_size, viennacl::context ctx) +: size_(vec_size), start_(0), stride_(1), internal_size_(viennacl::tools::align_to_multiple(size_, dense_padding_size)) { - if (size_ > 0) - { - // [kk:] this is the constructor that we are concerned about - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx, NULL, use_mempool); - clear(); - } +if (size_ > 0) +{ + // [kk:] this is the constructor that we are concerned about + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx, NULL); + clear(); +} } // CUDA or host memory: -template -vector_base::vector_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type vec_size, vcl_size_t start, size_type stride) - : size_(vec_size), start_(start), stride_(stride), internal_size_(vec_size) +template +vector_base::vector_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type vec_size, vcl_size_t start, size_type stride) +: size_(vec_size), start_(start), stride_(stride), internal_size_(vec_size) +{ +if (mem_type == viennacl::CUDA_MEMORY) { - if (mem_type == viennacl::CUDA_MEMORY) - { #ifdef VIENNACL_WITH_CUDA - elements_.switch_active_handle_id(viennacl::CUDA_MEMORY); - elements_.cuda_handle().reset(reinterpret_cast(ptr_to_mem)); - elements_.cuda_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. + elements_.switch_active_handle_id(viennacl::CUDA_MEMORY); + elements_.cuda_handle().reset(reinterpret_cast(ptr_to_mem)); + elements_.cuda_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. #else - throw cuda_not_available_exception(); + throw cuda_not_available_exception(); #endif - } - else if (mem_type == viennacl::MAIN_MEMORY) - { - elements_.switch_active_handle_id(viennacl::MAIN_MEMORY); - elements_.ram_handle().reset(reinterpret_cast(ptr_to_mem)); - elements_.ram_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. - } +} +else if (mem_type == viennacl::MAIN_MEMORY) +{ + elements_.switch_active_handle_id(viennacl::MAIN_MEMORY); + elements_.ram_handle().reset(reinterpret_cast(ptr_to_mem)); + elements_.ram_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. +} - elements_.raw_size(sizeof(NumericT) * vec_size); +elements_.raw_size(sizeof(NumericT) * vec_size); } #ifdef VIENNACL_WITH_OPENCL -template -vector_base::vector_base(cl_mem existing_mem, size_type vec_size, size_type start, size_type stride, viennacl::context ctx) - : size_(vec_size), start_(start), stride_(stride), internal_size_(vec_size) +template +vector_base::vector_base(cl_mem existing_mem, size_type vec_size, size_type start, size_type stride, viennacl::context ctx) +: size_(vec_size), start_(start), stride_(stride), internal_size_(vec_size) { - elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); - elements_.opencl_handle() = existing_mem; - elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. - elements_.opencl_handle().context(ctx.opencl_context()); - elements_.raw_size(sizeof(NumericT) * vec_size); +elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); +elements_.opencl_handle() = existing_mem; +elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. +elements_.opencl_handle().context(ctx.opencl_context()); +elements_.raw_size(sizeof(NumericT) * vec_size); } #endif -template +template template -vector_base::vector_base(vector_expression const & proxy) - : size_(viennacl::traits::size(proxy)), start_(0), stride_(1), internal_size_(viennacl::tools::align_to_multiple(size_, dense_padding_size)) +vector_base::vector_base(vector_expression const & proxy) +: size_(viennacl::traits::size(proxy)), start_(0), stride_(1), internal_size_(viennacl::tools::align_to_multiple(size_, dense_padding_size)) { - if (size_ > 0) - { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy)); - clear(); - } - self_type::operator=(proxy); +if (size_ > 0) +{ + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy)); + clear(); +} +self_type::operator=(proxy); } // Copy CTOR: -template -vector_base::vector_base(const vector_base & other) : - size_(other.size_), start_(0), stride_(1), - internal_size_(viennacl::tools::align_to_multiple(other.size_, dense_padding_size)) +template +vector_base::vector_base(const vector_base & other) : +size_(other.size_), start_(0), stride_(1), +internal_size_(viennacl::tools::align_to_multiple(other.size_, dense_padding_size)) { - elements_.switch_active_handle_id(viennacl::traits::active_handle_id(other)); - if (internal_size() > 0) - { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(other)); - clear(); - self_type::operator=(other); - } +elements_.switch_active_handle_id(viennacl::traits::active_handle_id(other)); +if (internal_size() > 0) +{ + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(other)); + clear(); + self_type::operator=(other); +} } // Conversion CTOR: -template +template template -vector_base::vector_base(const vector_base & other) : - size_(other.size()), start_(0), stride_(1), - internal_size_(viennacl::tools::align_to_multiple(other.size(), dense_padding_size)) +vector_base::vector_base(const vector_base & other) : +size_(other.size()), start_(0), stride_(1), +internal_size_(viennacl::tools::align_to_multiple(other.size(), dense_padding_size)) { - elements_.switch_active_handle_id(viennacl::traits::active_handle_id(other)); - if (internal_size() > 0) - { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(other)); - clear(); - self_type::operator=(other); - } +elements_.switch_active_handle_id(viennacl::traits::active_handle_id(other)); +if (internal_size() > 0) +{ + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(other)); + clear(); + self_type::operator=(other); +} } -template -vector_base & vector_base::operator=(const self_type & vec) +template +vector_base & vector_base::operator=(const self_type & vec) { - assert( ( (vec.size() == size()) || (size() == 0) ) - && bool("Incompatible vector sizes!")); +assert( ( (vec.size() == size()) || (size() == 0) ) + && bool("Incompatible vector sizes!")); - if (&vec==this) - return *this; +if (&vec==this) + return *this; - if (vec.size() > 0) +if (vec.size() > 0) +{ + if (size_ == 0) { - if (size_ == 0) - { - size_ = vec.size(); - internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); - elements_.switch_active_handle_id(vec.handle().get_active_handle_id()); - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(vec)); - pad(); - } - - viennacl::linalg::av(*this, - vec, cpu_value_type(1.0), 1, false, false); + size_ = vec.size(); + internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); + elements_.switch_active_handle_id(vec.handle().get_active_handle_id()); + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(vec)); + pad(); } - return *this; + viennacl::linalg::av(*this, + vec, cpu_value_type(1.0), 1, false, false); +} + +return *this; } @@ -386,122 +386,122 @@ vector_base & vector_base +template template -vector_base & vector_base::operator=(const vector_expression & proxy) +vector_base & vector_base::operator=(const vector_expression & proxy) { - assert( ( (viennacl::traits::size(proxy) == size()) || (size() == 0) ) - && bool("Incompatible vector sizes!")); +assert( ( (viennacl::traits::size(proxy) == size()) || (size() == 0) ) + && bool("Incompatible vector sizes!")); - // initialize the necessary buffer - if (size() == 0) - { - size_ = viennacl::traits::size(proxy); - internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy)); - pad(); - } +// initialize the necessary buffer +if (size() == 0) +{ + size_ = viennacl::traits::size(proxy); + internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy)); + pad(); +} - linalg::detail::op_executor >::apply(*this, proxy); +linalg::detail::op_executor >::apply(*this, proxy); - return *this; +return *this; } // convert from vector with other numeric type -template +template template -vector_base & vector_base:: operator = (const vector_base & v1) +vector_base & vector_base:: operator = (const vector_base & v1) { - assert( ( (v1.size() == size()) || (size() == 0) ) - && bool("Incompatible vector sizes!")); +assert( ( (v1.size() == size()) || (size() == 0) ) + && bool("Incompatible vector sizes!")); - if (size() == 0) +if (size() == 0) +{ + size_ = v1.size(); + if (size_ > 0) { - size_ = v1.size(); - if (size_ > 0) - { - internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(v1)); - pad(); - } + internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(v1)); + pad(); } +} - viennacl::linalg::convert(*this, v1); +viennacl::linalg::convert(*this, v1); - return *this; +return *this; } /** @brief Creates the vector from the supplied unit vector. */ -template -vector_base & vector_base::operator = (unit_vector const & v) +template +vector_base & vector_base::operator = (unit_vector const & v) { - assert( ( (v.size() == size()) || (size() == 0) ) - && bool("Incompatible vector sizes!")); +assert( ( (v.size() == size()) || (size() == 0) ) + && bool("Incompatible vector sizes!")); - if (size() == 0) +if (size() == 0) +{ + size_ = v.size(); + internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); + if (size_ > 0) { - size_ = v.size(); - internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); - if (size_ > 0) - { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), v.context()); - clear(); - } + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), v.context()); + clear(); } - else - viennacl::linalg::vector_assign(*this, NumericT(0)); +} +else + viennacl::linalg::vector_assign(*this, NumericT(0)); - if (size_ > 0) - this->operator()(v.index()) = NumericT(1); +if (size_ > 0) + this->operator()(v.index()) = NumericT(1); - return *this; +return *this; } /** @brief Creates the vector from the supplied zero vector. */ -template -vector_base & vector_base::operator = (zero_vector const & v) +template +vector_base & vector_base::operator = (zero_vector const & v) { - assert( ( (v.size() == size()) || (size() == 0) ) - && bool("Incompatible vector sizes!")); +assert( ( (v.size() == size()) || (size() == 0) ) + && bool("Incompatible vector sizes!")); - if (size() == 0) +if (size() == 0) +{ + size_ = v.size(); + internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); + if (size_ > 0) { - size_ = v.size(); - internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); - if (size_ > 0) - { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), v.context()); - clear(); - } + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), v.context()); + clear(); } - else - viennacl::linalg::vector_assign(*this, NumericT(0)); +} +else + viennacl::linalg::vector_assign(*this, NumericT(0)); - return *this; +return *this; } /** @brief Creates the vector from the supplied scalar vector. */ -template -vector_base & vector_base::operator = (scalar_vector const & v) +template +vector_base & vector_base::operator = (scalar_vector const & v) { - assert( ( (v.size() == size()) || (size() == 0) ) - && bool("Incompatible vector sizes!")); +assert( ( (v.size() == size()) || (size() == 0) ) + && bool("Incompatible vector sizes!")); - if (size() == 0) +if (size() == 0) +{ + size_ = v.size(); + internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); + if (size_ > 0) { - size_ = v.size(); - internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); - if (size_ > 0) - { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), v.context()); - pad(); - } + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), v.context()); + pad(); } +} - if (size_ > 0) - viennacl::linalg::vector_assign(*this, v[0]); +if (size_ > 0) + viennacl::linalg::vector_assign(*this, v[0]); - return *this; +return *this; } @@ -512,46 +512,46 @@ vector_base & vector_base -template -vector_base & vector_base::operator=(const viennacl::vector_expression< const matrix_base, const vector_base, viennacl::op_prod> & proxy) +template +vector_base & vector_base::operator=(const viennacl::vector_expression< const matrix_base, const vector_base, viennacl::op_prod> & proxy) { - assert(viennacl::traits::size1(proxy.lhs()) == size() && bool("Size check failed for v1 = A * v2: size1(A) != size(v1)")); +assert(viennacl::traits::size1(proxy.lhs()) == size() && bool("Size check failed for v1 = A * v2: size1(A) != size(v1)")); - // check for the special case x = A * x - if (viennacl::traits::handle(proxy.rhs()) == viennacl::traits::handle(*this)) - { - viennacl::vector result(viennacl::traits::size1(proxy.lhs())); - viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); - *this = result; - } - else - { - viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); - } - return *this; +// check for the special case x = A * x +if (viennacl::traits::handle(proxy.rhs()) == viennacl::traits::handle(*this)) +{ + viennacl::vector result(viennacl::traits::size1(proxy.lhs())); + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); + *this = result; +} +else +{ + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); +} +return *this; } //transposed_matrix_proxy: -template -vector_base & vector_base::operator=(const vector_expression< const matrix_expression< const matrix_base, const matrix_base, op_trans >, - const vector_base, - op_prod> & proxy) +template +vector_base & vector_base::operator=(const vector_expression< const matrix_expression< const matrix_base, const matrix_base, op_trans >, + const vector_base, + op_prod> & proxy) { - assert(viennacl::traits::size1(proxy.lhs()) == size() && bool("Size check failed in v1 = trans(A) * v2: size2(A) != size(v1)")); +assert(viennacl::traits::size1(proxy.lhs()) == size() && bool("Size check failed in v1 = trans(A) * v2: size2(A) != size(v1)")); - // check for the special case x = trans(A) * x - if (viennacl::traits::handle(proxy.rhs()) == viennacl::traits::handle(*this)) - { - viennacl::vector result(viennacl::traits::size1(proxy.lhs())); - viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); - *this = result; - } - else - { - viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); - } - return *this; +// check for the special case x = trans(A) * x +if (viennacl::traits::handle(proxy.rhs()) == viennacl::traits::handle(*this)) +{ + viennacl::vector result(viennacl::traits::size1(proxy.lhs())); + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); + *this = result; +} +else +{ + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); +} +return *this; } ///////////////////////////// Matrix Vector interaction end /////////////////////////////////// @@ -560,40 +560,40 @@ vector_base & vector_base -entry_proxy vector_base::operator()(size_type index) +template +entry_proxy vector_base::operator()(size_type index) { - assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); - assert( index < size() && bool("Index out of bounds!") ); +assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); +assert( index < size() && bool("Index out of bounds!") ); - return entry_proxy(start_ + stride_ * index, elements_); +return entry_proxy(start_ + stride_ * index, elements_); } -template -entry_proxy vector_base::operator[](size_type index) +template +entry_proxy vector_base::operator[](size_type index) { - assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); - assert( index < size() && bool("Index out of bounds!") ); +assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); +assert( index < size() && bool("Index out of bounds!") ); - return entry_proxy(start_ + stride_ * index, elements_); +return entry_proxy(start_ + stride_ * index, elements_); } -template -const_entry_proxy vector_base::operator()(size_type index) const +template +const_entry_proxy vector_base::operator()(size_type index) const { - assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); - assert( index < size() && bool("Index out of bounds!") ); +assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); +assert( index < size() && bool("Index out of bounds!") ); - return const_entry_proxy(start_ + stride_ * index, elements_); +return const_entry_proxy(start_ + stride_ * index, elements_); } -template -const_entry_proxy vector_base::operator[](size_type index) const +template +const_entry_proxy vector_base::operator[](size_type index) const { - assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); - assert( index < size() && bool("Index out of bounds!") ); +assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); +assert( index < size() && bool("Index out of bounds!") ); - return const_entry_proxy(start_ + stride_ * index, elements_); +return const_entry_proxy(start_ + stride_ * index, elements_); } //////////////////////////// Read-write access to an element of the vector end /////////////////// @@ -602,236 +602,236 @@ const_entry_proxy vector_base::operator[]( // // Operator overloads with implicit conversion (thus cannot be made global without introducing additional headache) // -template -vector_base & vector_base::operator += (const self_type & vec) +template +vector_base & vector_base::operator += (const self_type & vec) { - assert(vec.size() == size() && bool("Incompatible vector sizes!")); +assert(vec.size() == size() && bool("Incompatible vector sizes!")); - if (size() > 0) - viennacl::linalg::avbv(*this, - *this, NumericT(1.0), 1, false, false, - vec, NumericT(1.0), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::avbv(*this, + *this, NumericT(1.0), 1, false, false, + vec, NumericT(1.0), 1, false, false); +return *this; } -template -vector_base & vector_base::operator -= (const self_type & vec) +template +vector_base & vector_base::operator -= (const self_type & vec) { - assert(vec.size() == size() && bool("Incompatible vector sizes!")); +assert(vec.size() == size() && bool("Incompatible vector sizes!")); - if (size() > 0) - viennacl::linalg::avbv(*this, - *this, NumericT(1.0), 1, false, false, - vec, NumericT(-1.0), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::avbv(*this, + *this, NumericT(1.0), 1, false, false, + vec, NumericT(-1.0), 1, false, false); +return *this; } /** @brief Scales a vector (or proxy) by a char (8-bit integer) value */ -template -vector_base & vector_base::operator *= (char val) +template +vector_base & vector_base::operator *= (char val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, false, false); +return *this; } /** @brief Scales a vector (or proxy) by a short integer value */ -template -vector_base & vector_base::operator *= (short val) +template +vector_base & vector_base::operator *= (short val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, false, false); +return *this; } /** @brief Scales a vector (or proxy) by an integer value */ -template -vector_base & vector_base::operator *= (int val) +template +vector_base & vector_base::operator *= (int val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, false, false); +return *this; } /** @brief Scales a vector (or proxy) by a long integer value */ -template -vector_base & vector_base::operator *= (long val) +template +vector_base & vector_base::operator *= (long val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, false, false); +return *this; } /** @brief Scales a vector (or proxy) by a single precision floating point value */ -template -vector_base & vector_base::operator *= (float val) +template +vector_base & vector_base::operator *= (float val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, false, false); +return *this; } /** @brief Scales a vector (or proxy) by a double precision floating point value */ -template -vector_base & vector_base::operator *= (double val) +template +vector_base & vector_base::operator *= (double val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, false, false); +return *this; } /** @brief Scales this vector by a char (8-bit) value */ -template -vector_base & vector_base::operator /= (char val) +template +vector_base & vector_base::operator /= (char val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, true, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, true, false); +return *this; } /** @brief Scales this vector by a short integer value */ -template -vector_base & vector_base::operator /= (short val) +template +vector_base & vector_base::operator /= (short val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, true, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, true, false); +return *this; } /** @brief Scales this vector by an integer value */ -template -vector_base & vector_base::operator /= (int val) +template +vector_base & vector_base::operator /= (int val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, true, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, true, false); +return *this; } /** @brief Scales this vector by a long integer value */ -template -vector_base & vector_base::operator /= (long val) +template +vector_base & vector_base::operator /= (long val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, true, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, true, false); +return *this; } /** @brief Scales this vector by a single precision floating point value */ -template -vector_base & vector_base::operator /= (float val) +template +vector_base & vector_base::operator /= (float val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, true, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, true, false); +return *this; } /** @brief Scales this vector by a double precision floating point value */ -template -vector_base & vector_base::operator /= (double val) +template +vector_base & vector_base::operator /= (double val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, true, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, true, false); +return *this; } /** @brief Scales the vector by a char (8-bit value) 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (char value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (char value) const { - return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a short integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (short value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (short value) const { - return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by an integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (int value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (int value) const { - return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a long integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (long value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (long value) const { - return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a single precision floating point number 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (float value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (float value) const { - return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a single precision floating point number 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (double value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (double value) const { - return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a char (8-bit value) 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (char value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (char value) const { - return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by a short integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (short value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (short value) const { - return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by an integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (int value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (int value) const { - return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by a long integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (long value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (long value) const { - return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by a single precision floating point number 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (float value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (float value) const { - return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by a double precision floating point number 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (double value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (double value) const { - return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Sign flip for the vector. Emulated to be equivalent to -1.0 * vector */ -template -vector_expression, const NumericT, op_mult> -vector_base::operator-() const +template +vector_expression, const NumericT, op_mult> +vector_base::operator-() const { - return vector_expression(*this, NumericT(-1.0)); +return vector_expression(*this, NumericT(-1.0)); } // @@ -839,88 +839,88 @@ vector_base::operator-() const // /** @brief Returns an iterator pointing to the beginning of the vector (STL like)*/ -template -typename vector_base::iterator vector_base::begin() +template +typename vector_base::iterator vector_base::begin() { - return iterator(*this, 0, start_, stride_); +return iterator(*this, 0, start_, stride_); } /** @brief Returns an iterator pointing to the end of the vector (STL like)*/ -template -typename vector_base::iterator vector_base::end() +template +typename vector_base::iterator vector_base::end() { - return iterator(*this, size(), start_, stride_); +return iterator(*this, size(), start_, stride_); } /** @brief Returns a const-iterator pointing to the beginning of the vector (STL like)*/ -template -typename vector_base::const_iterator vector_base::begin() const +template +typename vector_base::const_iterator vector_base::begin() const { - return const_iterator(*this, 0, start_, stride_); +return const_iterator(*this, 0, start_, stride_); } -template -typename vector_base::const_iterator vector_base::end() const +template +typename vector_base::const_iterator vector_base::end() const { - return const_iterator(*this, size(), start_, stride_); +return const_iterator(*this, size(), start_, stride_); } -template -vector_base & vector_base::swap(self_type & other) +template +vector_base & vector_base::swap(self_type & other) { - viennacl::linalg::vector_swap(*this, other); - return *this; +viennacl::linalg::vector_swap(*this, other); +return *this; } -template -void vector_base::clear() +template +void vector_base::clear() { - viennacl::linalg::vector_assign(*this, cpu_value_type(0.0), true); +viennacl::linalg::vector_assign(*this, cpu_value_type(0.0), true); } -template -vector_base & vector_base::fast_swap(self_type & other) +template +vector_base & vector_base::fast_swap(self_type & other) { - assert(this->size_ == other.size_ && bool("Vector size mismatch")); - this->elements_.swap(other.elements_); - return *this; +assert(this->size_ == other.size_ && bool("Vector size mismatch")); +this->elements_.swap(other.elements_); +return *this; } -template -void vector_base::pad() +template +void vector_base::pad() { - if (internal_size() != size()) - { - std::vector pad(internal_size() - size()); - viennacl::backend::memory_write(elements_, sizeof(NumericT) * size(), sizeof(NumericT) * pad.size(), &(pad[0])); - } +if (internal_size() != size()) +{ + std::vector pad(internal_size() - size()); + viennacl::backend::memory_write(elements_, sizeof(NumericT) * size(), sizeof(NumericT) * pad.size(), &(pad[0])); +} } -template -void vector_base::switch_memory_context(viennacl::context new_ctx) +template +void vector_base::switch_memory_context(viennacl::context new_ctx) { - viennacl::backend::switch_memory_context(elements_, new_ctx); +viennacl::backend::switch_memory_context(elements_, new_ctx); } //TODO: Think about implementing the following public member functions //void insert_element(unsigned int i, NumericT val){} //void erase_element(unsigned int i){} -template -void vector_base::resize(size_type new_size, bool preserve) +template +void vector_base::resize(size_type new_size, bool preserve) { - resize_impl(new_size, viennacl::traits::context(*this), preserve); +resize_impl(new_size, viennacl::traits::context(*this), preserve); } -template -void vector_base::resize(size_type new_size, viennacl::context ctx, bool preserve) +template +void vector_base::resize(size_type new_size, viennacl::context ctx, bool preserve) { - resize_impl(new_size, ctx, preserve); +resize_impl(new_size, ctx, preserve); } -template -void vector_base::resize_impl(size_type new_size, viennacl::context ctx, bool preserve) +template +void vector_base::resize_impl(size_type new_size, viennacl::context ctx, bool preserve) { assert(new_size > 0 && bool("Positive size required when resizing vector!")); @@ -948,11 +948,11 @@ void vector_base::resize_impl(size_type new_size, vi } -template -class vector : public vector_base +template +class vector : public vector_base { - typedef vector self_type; - typedef vector_base base_type; + typedef vector self_type; + typedef vector_base base_type; public: typedef typename base_type::size_type size_type; @@ -968,7 +968,7 @@ public: */ explicit vector(size_type vec_size) : base_type(vec_size) {} - explicit vector(size_type vec_size, viennacl::context ctx, bool use_mempool = false) : base_type(vec_size, ctx, use_mempool) {} + explicit vector(size_type vec_size, viennacl::context ctx) : base_type(vec_size, ctx) {} explicit vector(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type vec_size, size_type start = 0, size_type stride = 1) : base_type(ptr_to_mem, mem_type, vec_size, start, stride) {} -- GitLab From 4bca0f868b275f183f302a0575a6bc1fe40edd05 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 16 Dec 2018 16:42:50 -0600 Subject: [PATCH 21/46] more generalizations of the templating thigies --- viennacl/detail/vector_def.hpp | 4 ++-- viennacl/forwards.h | 4 ++-- viennacl/linalg/norm_2.hpp | 8 ++++---- viennacl/meta/result_of.hpp | 8 ++++---- viennacl/scalar.hpp | 6 +++--- viennacl/vector.hpp | 20 ++++++++++---------- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/viennacl/detail/vector_def.hpp b/viennacl/detail/vector_def.hpp index 4118975d..6c50003e 100644 --- a/viennacl/detail/vector_def.hpp +++ b/viennacl/detail/vector_def.hpp @@ -111,8 +111,8 @@ public: typedef viennacl::backend::mem_handle handle_type; typedef SizeT size_type; typedef DistanceT difference_type; - typedef const_vector_iterator const_iterator; - typedef vector_iterator iterator; + typedef const_vector_iterator const_iterator; + typedef vector_iterator iterator; /** @brief Returns the length of the vector (cf. std::vector) */ size_type size() const { return size_; } diff --git a/viennacl/forwards.h b/viennacl/forwards.h index 3abad300..26ffdd15 100644 --- a/viennacl/forwards.h +++ b/viennacl/forwards.h @@ -270,10 +270,10 @@ namespace viennacl template class vector_expression; - template + template> class vector_iterator; - template> + template> class const_vector_iterator; template diff --git a/viennacl/linalg/norm_2.hpp b/viennacl/linalg/norm_2.hpp index babb2856..2f6e6829 100644 --- a/viennacl/linalg/norm_2.hpp +++ b/viennacl/linalg/norm_2.hpp @@ -105,11 +105,11 @@ namespace viennacl // ---------------------------------------------------- // VIENNACL // - template< typename ScalarType> - viennacl::scalar_expression< const viennacl::vector_base, - const viennacl::vector_base, + template< typename ScalarType, typename H=viennacl::ocl::handle > + viennacl::scalar_expression< const viennacl::vector_base, + const viennacl::vector_base, viennacl::op_norm_2 > - norm_2(viennacl::vector_base const & v) + norm_2(viennacl::vector_base const & v) { //std::cout << "viennacl .. " << std::endl; return viennacl::scalar_expression< const viennacl::vector_base, diff --git a/viennacl/meta/result_of.hpp b/viennacl/meta/result_of.hpp index 32a0491a..328427c4 100644 --- a/viennacl/meta/result_of.hpp +++ b/viennacl/meta/result_of.hpp @@ -339,14 +339,14 @@ struct cpu_value_type typedef double type; }; -template -struct cpu_value_type > +template +struct cpu_value_type > { typedef T type; }; -template -struct cpu_value_type > +template +struct cpu_value_type > { typedef T type; }; diff --git a/viennacl/scalar.hpp b/viennacl/scalar.hpp index 5fdc5b00..9b08ca48 100644 --- a/viennacl/scalar.hpp +++ b/viennacl/scalar.hpp @@ -346,7 +346,7 @@ private: template class scalar { - typedef scalar self_type; + typedef scalar self_type; public: typedef viennacl::backend::mem_handle handle_type; typedef vcl_size_t size_type; @@ -411,7 +411,7 @@ public: } /** @brief Assigns a vector entry. */ - self_type & operator= (entry_proxy const & other) + self_type & operator= (entry_proxy const & other) { init_if_necessary(viennacl::traits::context(other)); viennacl::backend::memory_copy(other.handle(), val_, other.index() * sizeof(NumericT), 0, sizeof(NumericT)); @@ -419,7 +419,7 @@ public: } /** @brief Assigns the value from another scalar. */ - self_type & operator= (scalar const & other) + self_type & operator= (scalar const & other) { init_if_necessary(viennacl::traits::context(other)); viennacl::backend::memory_copy(other.handle(), val_, 0, 0, sizeof(NumericT)); diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index 074faec1..a4d528d1 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -107,7 +107,7 @@ private: template class const_vector_iterator { - typedef const_vector_iterator self_type; + typedef const_vector_iterator self_type; public: typedef scalar value_type; typedef vcl_size_t size_type; @@ -122,7 +122,7 @@ public: * @param start First index of the element in the vector pointed to be the iterator (for vector_range and vector_slice) * @param stride Stride for the support of vector_slice */ - const_vector_iterator(vector_base const & vec, + const_vector_iterator(vector_base const & vec, size_type index, size_type start = 0, size_type stride = 1) : elements_(vec.handle()), index_(index), start_(start), stride_(stride) {} @@ -142,7 +142,7 @@ public: value_type operator*(void) const { value_type result; - result = const_entry_proxy(start_ + index_ * stride(), elements_); + result = const_entry_proxy(start_ + index_ * stride(), elements_); return result; } self_type operator++(void) { ++index_; return *this; } @@ -201,11 +201,11 @@ protected: * @tparam NumericT The underlying floating point type (either float or double) * @tparam AlignmentV Alignment of the underlying vector, @see vector */ -template -class vector_iterator : public const_vector_iterator +template +class vector_iterator : public const_vector_iterator { - typedef const_vector_iterator base_type; - typedef vector_iterator self_type; + typedef const_vector_iterator base_type; + typedef vector_iterator self_type; public: typedef typename base_type::handle_type handle_type; typedef typename base_type::size_type size_type; @@ -221,15 +221,15 @@ public: * @param start Offset from the beginning of the underlying vector (for ranges and slices) * @param stride Stride for slices */ - vector_iterator(vector_base & vec, + vector_iterator(vector_base & vec, size_type index, size_type start = 0, size_type stride = 1) : base_type(vec, index, start, stride), elements_(vec.handle()) {} //vector_iterator(base_type const & b) : base_type(b) {} - entry_proxy operator*(void) + entry_proxy operator*(void) { - return entry_proxy(base_type::start_ + base_type::index_ * base_type::stride(), elements_); + return entry_proxy(base_type::start_ + base_type::index_ * base_type::stride(), elements_); } difference_type operator-(self_type const & other) const { difference_type result = base_type::index_; return (result - static_cast(other.index_)); } -- GitLab From d99ad527df978e6ebe93aaf136162f0fe4c19a11 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 16 Dec 2018 17:10:00 -0600 Subject: [PATCH 22/46] dont track ctags files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index b86d6651..a0f9b24f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ build .*.swp +tags -- GitLab From 49268b61074af819fac17cb79f40712d0efcc290 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 16 Dec 2018 18:01:17 -0600 Subject: [PATCH 23/46] more changes to adjust to the template parameter for the memory handle --- viennacl/forwards.h | 4 +- .../linalg/host_based/vector_operations.hpp | 4 +- viennacl/linalg/norm_2.hpp | 4 +- viennacl/linalg/opencl/vector_operations.hpp | 14 +++---- viennacl/linalg/vector_operations.hpp | 4 +- viennacl/traits/handle.hpp | 16 +++++--- viennacl/traits/stride.hpp | 6 +-- viennacl/vector.hpp | 37 ++++++++++--------- 8 files changed, 48 insertions(+), 41 deletions(-) diff --git a/viennacl/forwards.h b/viennacl/forwards.h index 26ffdd15..666270cb 100644 --- a/viennacl/forwards.h +++ b/viennacl/forwards.h @@ -745,8 +745,8 @@ namespace viennacl void norm_2_impl(viennacl::vector_expression const & vec, scalar & result); - template - void norm_2_cpu(vector_base const & vec, T & result); + template> + void norm_2_cpu(vector_base const & vec, T & result); template void norm_2_cpu(viennacl::vector_expression const & vec, diff --git a/viennacl/linalg/host_based/vector_operations.hpp b/viennacl/linalg/host_based/vector_operations.hpp index 01d87166..9180e4c9 100644 --- a/viennacl/linalg/host_based/vector_operations.hpp +++ b/viennacl/linalg/host_based/vector_operations.hpp @@ -271,8 +271,8 @@ void avbv_v(vector_base & vec1, * @param alpha The value to be assigned * @param up_to_internal_size Specifies whether alpha should also be written to padded memory (mostly used for clearing the whole buffer). */ -template -void vector_assign(vector_base & vec1, const NumericT & alpha, bool up_to_internal_size = false) +template +void vector_assign(vector_base & vec1, const NumericT & alpha, bool up_to_internal_size = false) { typedef NumericT value_type; diff --git a/viennacl/linalg/norm_2.hpp b/viennacl/linalg/norm_2.hpp index 2f6e6829..713b64b6 100644 --- a/viennacl/linalg/norm_2.hpp +++ b/viennacl/linalg/norm_2.hpp @@ -112,8 +112,8 @@ namespace viennacl norm_2(viennacl::vector_base const & v) { //std::cout << "viennacl .. " << std::endl; - return viennacl::scalar_expression< const viennacl::vector_base, - const viennacl::vector_base, + return viennacl::scalar_expression< const viennacl::vector_base, + const viennacl::vector_base, viennacl::op_norm_2 >(v, v); } diff --git a/viennacl/linalg/opencl/vector_operations.hpp b/viennacl/linalg/opencl/vector_operations.hpp index 5d016eff..44b45de0 100644 --- a/viennacl/linalg/opencl/vector_operations.hpp +++ b/viennacl/linalg/opencl/vector_operations.hpp @@ -242,8 +242,8 @@ void avbv_v(vector_base & vec1, * @param alpha The value to be assigned * @param up_to_internal_size Specifies whether alpha should also be written to padded memory (mostly used for clearing the whole buffer). */ -template -void vector_assign(vector_base & vec1, const T & alpha, bool up_to_internal_size = false) +template +void vector_assign(vector_base & vec1, const T & alpha, bool up_to_internal_size = false) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec1).context()); viennacl::linalg::opencl::kernels::vector::init(ctx); @@ -788,9 +788,9 @@ void inner_prod_cpu(vector_base const & vec1, * @param partial_result The result scalar * @param norm_id Norm selector. 0: norm_inf, 1: norm_1, 2: norm_2 */ -template -void norm_reduction_impl(vector_base const & vec, - vector_base & partial_result, +template +void norm_reduction_impl(vector_base const & vec, + vector_base & partial_result, cl_uint norm_id) { assert(viennacl::traits::opencl_handle(vec).context() == viennacl::traits::opencl_handle(partial_result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -918,8 +918,8 @@ void norm_2_impl(vector_base const & vec, * @param vec The vector * @param result The result scalar */ -template -void norm_2_cpu(vector_base const & vec, +template +void norm_2_cpu(vector_base const & vec, T & result) { std::cout << "norm_2_kernel asked for a vector.\n"; diff --git a/viennacl/linalg/vector_operations.hpp b/viennacl/linalg/vector_operations.hpp index 5add9cea..e7c17a4a 100644 --- a/viennacl/linalg/vector_operations.hpp +++ b/viennacl/linalg/vector_operations.hpp @@ -183,8 +183,8 @@ namespace viennacl * @param alpha The value to be assigned * @param up_to_internal_size Whether 'alpha' should be written to padded memory as well. This is used for setting all entries to zero, including padded memory. */ - template - void vector_assign(vector_base & vec1, const T & alpha, bool up_to_internal_size = false) + template + void vector_assign(vector_base & vec1, const T & alpha, bool up_to_internal_size = false) { switch (viennacl::traits::handle(vec1).get_active_handle_id()) { diff --git a/viennacl/traits/handle.hpp b/viennacl/traits/handle.hpp index cef56d61..df1fb9f5 100644 --- a/viennacl/traits/handle.hpp +++ b/viennacl/traits/handle.hpp @@ -37,15 +37,15 @@ namespace traits // Generic memory handle // /** @brief Returns the generic memory handle of an object. Non-const version. */ -template> -viennacl::backend::mem_handle & handle(T & obj) +template +viennacl::backend::mem_handle<> & handle(T & obj) { return obj.handle(); } /** @brief Returns the generic memory handle of an object. Const-version. */ -template> -viennacl::backend::mem_handle const & handle(T const & obj) +template +viennacl::backend::mem_handle<> const & handle(T const & obj) { return obj.handle(); } @@ -197,7 +197,7 @@ H & opencl_handle(T & obj) } /** @brief Generic helper routine for extracting the OpenCL handle of a ViennaCL object. Const version. */ -template> +template H const & opencl_handle(T const & obj) { return viennacl::traits::handle(obj).opencl_handle(); @@ -222,6 +222,12 @@ viennacl::ocl::handle const & opencl_handle(viennacl::vector_expression< return viennacl::traits::handle(obj.rhs()).opencl_handle(); } +template +viennacl::ocl::pooled_clmem_handle const & opencl_handle(viennacl::vector_expression, op_prod> const & obj) +{ + return viennacl::traits::handle(obj.rhs()).opencl_handle(); +} + template viennacl::ocl::context & opencl_context(T const & obj) { diff --git a/viennacl/traits/stride.hpp b/viennacl/traits/stride.hpp index 68c46814..40f1eb52 100644 --- a/viennacl/traits/stride.hpp +++ b/viennacl/traits/stride.hpp @@ -40,9 +40,9 @@ namespace traits // // inc: Increment for vectors. Defaults to 1 // -template -typename result_of::size_type< viennacl::vector_base >::type -stride(viennacl::vector_base const & s) { return s.stride(); } +template +typename result_of::size_type< viennacl::vector_base >::type +stride(viennacl::vector_base const & s) { return s.stride(); } // // inc1: Row increment for matrices. Defaults to 1 diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index a4d528d1..b716b54f 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -1232,9 +1232,9 @@ vector_tuple tie(vector_base & v0, * @param gpu_end GPU iterator pointing to the end of the vector (STL-like) * @param cpu_begin Output iterator for the cpu vector. The cpu vector must be at least as long as the gpu vector! */ -template -void fast_copy(const const_vector_iterator & gpu_begin, - const const_vector_iterator & gpu_end, +template +void fast_copy(const const_vector_iterator & gpu_begin, + const const_vector_iterator & gpu_end, CPU_ITERATOR cpu_begin ) { if (gpu_begin != gpu_end) @@ -1282,9 +1282,9 @@ void fast_copy(vector_base const & gpu_vec, CPUVECTOR & cpu_vec ) * @param gpu_end GPU iterator pointing to the end of the vector (STL-like) * @param cpu_begin Output iterator for the cpu vector. The cpu vector must be at least as long as the gpu vector! */ -template -void async_copy(const const_vector_iterator & gpu_begin, - const const_vector_iterator & gpu_end, +template +void async_copy(const const_vector_iterator & gpu_begin, + const const_vector_iterator & gpu_end, CPU_ITERATOR cpu_begin ) { if (gpu_begin != gpu_end) @@ -1320,9 +1320,9 @@ void async_copy(vector_base const & gpu_vec, CPUVECTOR & cpu_vec ) * @param gpu_end GPU constant iterator pointing to the end of the vector (STL-like) * @param cpu_begin Output iterator for the cpu vector. The cpu vector must be at least as long as the gpu vector! */ -template -void copy(const const_vector_iterator & gpu_begin, - const const_vector_iterator & gpu_end, +template +void copy(const const_vector_iterator & gpu_begin, + const const_vector_iterator & gpu_end, CPU_ITERATOR cpu_begin ) { assert(gpu_end - gpu_begin >= 0 && bool("Iterators incompatible")); @@ -1342,14 +1342,14 @@ void copy(const const_vector_iterator & gpu_begin, * @param gpu_end GPU iterator pointing to the end of the vector (STL-like) * @param cpu_begin Output iterator for the cpu vector. The cpu vector must be at least as long as the gpu vector! */ -template -void copy(const vector_iterator & gpu_begin, - const vector_iterator & gpu_end, +template +void copy(const vector_iterator & gpu_begin, + const vector_iterator & gpu_end, CPU_ITERATOR cpu_begin ) { - viennacl::copy(const_vector_iterator(gpu_begin), - const_vector_iterator(gpu_end), + viennacl::copy(const_vector_iterator(gpu_begin), + const_vector_iterator(gpu_end), cpu_begin); } @@ -1398,10 +1398,10 @@ void copy(vector const & gpu_vec, * @param cpu_end CPU iterator pointing to the end of the vector (STL-like) * @param gpu_begin Output iterator for the gpu vector. The gpu iterator must be incrementable (cpu_end - cpu_begin) times, otherwise the result is undefined. */ -template +template void fast_copy(CPU_ITERATOR const & cpu_begin, CPU_ITERATOR const & cpu_end, - vector_iterator gpu_begin) + vector_iterator gpu_begin) { if (cpu_end - cpu_begin > 0) { @@ -1432,8 +1432,9 @@ void fast_copy(CPU_ITERATOR const & cpu_begin, * @param cpu_vec A cpu vector. Type requirements: Iterator can be obtained via member function .begin() and .end() * @param gpu_vec The gpu vector. */ -template -void fast_copy(const CPUVECTOR & cpu_vec, vector_base & gpu_vec) +template +void fast_copy(const CPUVECTOR & cpu_vec, vector_base & gpu_vec) { viennacl::fast_copy(cpu_vec.begin(), cpu_vec.end(), gpu_vec.begin()); } -- GitLab From f191ef0570ea21c408de5e7fdf43e0f242016712 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 22 Dec 2018 19:15:27 -0600 Subject: [PATCH 24/46] changes the ordering of the template parameters of vector_base --- viennacl/detail/vector_def.hpp | 4 +- viennacl/forwards.h | 4 +- viennacl/vector.hpp | 262 ++++++++++++++++----------------- 3 files changed, 135 insertions(+), 135 deletions(-) diff --git a/viennacl/detail/vector_def.hpp b/viennacl/detail/vector_def.hpp index 6c50003e..92ed546c 100644 --- a/viennacl/detail/vector_def.hpp +++ b/viennacl/detail/vector_def.hpp @@ -100,10 +100,10 @@ struct zero_vector : public scalar_vector * * @tparam NumericT The floating point type, either 'float' or 'double' */ -template +template class vector_base { - typedef vector_base self_type; + typedef vector_base self_type; public: typedef scalar value_type; diff --git a/viennacl/forwards.h b/viennacl/forwards.h index 666270cb..85fe0aca 100644 --- a/viennacl/forwards.h +++ b/viennacl/forwards.h @@ -291,7 +291,7 @@ namespace viennacl template struct scalar_vector; - template> + template, typename SizeType = vcl_size_t, typename DistanceType = vcl_ptrdiff_t> class vector_base; template> @@ -746,7 +746,7 @@ namespace viennacl scalar & result); template> - void norm_2_cpu(vector_base const & vec, T & result); + void norm_2_cpu(vector_base const & vec, T & result); template void norm_2_cpu(viennacl::vector_expression const & vec, diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index b716b54f..5df23e68 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -247,16 +247,16 @@ private: }; -template -vector_base::vector_base() : size_(0), start_(0), stride_(1), internal_size_(0) { /* Note: One must not call ::init() here because a vector might have been created globally before the backend has become available */ } +template +vector_base::vector_base() : size_(0), start_(0), stride_(1), internal_size_(0) { /* Note: One must not call ::init() here because a vector might have been created globally before the backend has become available */ } -template -vector_base::vector_base(viennacl::backend::mem_handle & h, +template +vector_base::vector_base(viennacl::backend::mem_handle & h, size_type vec_size, size_type vec_start, size_type vec_stride) : size_(vec_size), start_(vec_start), stride_(vec_stride), internal_size_(vec_size), elements_(h) {} -template -vector_base::vector_base(size_type vec_size, viennacl::context ctx) +template +vector_base::vector_base(size_type vec_size, viennacl::context ctx) : size_(vec_size), start_(0), stride_(1), internal_size_(viennacl::tools::align_to_multiple(size_, dense_padding_size)) { if (size_ > 0) @@ -269,8 +269,8 @@ if (size_ > 0) // CUDA or host memory: -template -vector_base::vector_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type vec_size, vcl_size_t start, size_type stride) +template +vector_base::vector_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type vec_size, vcl_size_t start, size_type stride) : size_(vec_size), start_(start), stride_(stride), internal_size_(vec_size) { if (mem_type == viennacl::CUDA_MEMORY) @@ -295,8 +295,8 @@ elements_.raw_size(sizeof(NumericT) * vec_size); } #ifdef VIENNACL_WITH_OPENCL -template -vector_base::vector_base(cl_mem existing_mem, size_type vec_size, size_type start, size_type stride, viennacl::context ctx) +template +vector_base::vector_base(cl_mem existing_mem, size_type vec_size, size_type start, size_type stride, viennacl::context ctx) : size_(vec_size), start_(start), stride_(stride), internal_size_(vec_size) { elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); @@ -308,9 +308,9 @@ elements_.raw_size(sizeof(NumericT) * vec_size); #endif -template +template template -vector_base::vector_base(vector_expression const & proxy) +vector_base::vector_base(vector_expression const & proxy) : size_(viennacl::traits::size(proxy)), start_(0), stride_(1), internal_size_(viennacl::tools::align_to_multiple(size_, dense_padding_size)) { if (size_ > 0) @@ -322,8 +322,8 @@ self_type::operator=(proxy); } // Copy CTOR: -template -vector_base::vector_base(const vector_base & other) : +template +vector_base::vector_base(const vector_base & other) : size_(other.size_), start_(0), stride_(1), internal_size_(viennacl::tools::align_to_multiple(other.size_, dense_padding_size)) { @@ -337,9 +337,9 @@ if (internal_size() > 0) } // Conversion CTOR: -template +template template -vector_base::vector_base(const vector_base & other) : +vector_base::vector_base(const vector_base & other) : size_(other.size()), start_(0), stride_(1), internal_size_(viennacl::tools::align_to_multiple(other.size(), dense_padding_size)) { @@ -354,8 +354,8 @@ if (internal_size() > 0) -template -vector_base & vector_base::operator=(const self_type & vec) +template +vector_base & vector_base::operator=(const self_type & vec) { assert( ( (vec.size() == size()) || (size() == 0) ) && bool("Incompatible vector sizes!")); @@ -386,9 +386,9 @@ return *this; * * @param proxy An expression template proxy class. */ -template +template template -vector_base & vector_base::operator=(const vector_expression & proxy) +vector_base & vector_base::operator=(const vector_expression & proxy) { assert( ( (viennacl::traits::size(proxy) == size()) || (size() == 0) ) && bool("Incompatible vector sizes!")); @@ -408,9 +408,9 @@ return *this; } // convert from vector with other numeric type -template +template template -vector_base & vector_base:: operator = (const vector_base & v1) +vector_base & vector_base:: operator = (const vector_base & v1) { assert( ( (v1.size() == size()) || (size() == 0) ) && bool("Incompatible vector sizes!")); @@ -432,8 +432,8 @@ return *this; } /** @brief Creates the vector from the supplied unit vector. */ -template -vector_base & vector_base::operator = (unit_vector const & v) +template +vector_base & vector_base::operator = (unit_vector const & v) { assert( ( (v.size() == size()) || (size() == 0) ) && bool("Incompatible vector sizes!")); @@ -458,8 +458,8 @@ return *this; } /** @brief Creates the vector from the supplied zero vector. */ -template -vector_base & vector_base::operator = (zero_vector const & v) +template +vector_base & vector_base::operator = (zero_vector const & v) { assert( ( (v.size() == size()) || (size() == 0) ) && bool("Incompatible vector sizes!")); @@ -481,8 +481,8 @@ return *this; } /** @brief Creates the vector from the supplied scalar vector. */ -template -vector_base & vector_base::operator = (scalar_vector const & v) +template +vector_base & vector_base::operator = (scalar_vector const & v) { assert( ( (v.size() == size()) || (size() == 0) ) && bool("Incompatible vector sizes!")); @@ -512,8 +512,8 @@ return *this; //This is certainly not the nicest approach and will most likely by changed in the future, but it works :-) //matrix<> -template -vector_base & vector_base::operator=(const viennacl::vector_expression< const matrix_base, const vector_base, viennacl::op_prod> & proxy) +template +vector_base & vector_base::operator=(const viennacl::vector_expression< const matrix_base, const vector_base, viennacl::op_prod> & proxy) { assert(viennacl::traits::size1(proxy.lhs()) == size() && bool("Size check failed for v1 = A * v2: size1(A) != size(v1)")); @@ -533,8 +533,8 @@ return *this; //transposed_matrix_proxy: -template -vector_base & vector_base::operator=(const vector_expression< const matrix_expression< const matrix_base, const matrix_base, op_trans >, +template +vector_base & vector_base::operator=(const vector_expression< const matrix_expression< const matrix_base, const matrix_base, op_trans >, const vector_base, op_prod> & proxy) { @@ -560,8 +560,8 @@ return *this; //////////////////////////// Read-write access to an element of the vector start /////////////////// //read-write access to an element of the vector -template -entry_proxy vector_base::operator()(size_type index) +template +entry_proxy vector_base::operator()(size_type index) { assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); assert( index < size() && bool("Index out of bounds!") ); @@ -569,8 +569,8 @@ assert( index < size() && bool("Index out of bounds!") ); return entry_proxy(start_ + stride_ * index, elements_); } -template -entry_proxy vector_base::operator[](size_type index) +template +entry_proxy vector_base::operator[](size_type index) { assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); assert( index < size() && bool("Index out of bounds!") ); @@ -578,8 +578,8 @@ assert( index < size() && bool("Index out of bounds!") ); return entry_proxy(start_ + stride_ * index, elements_); } -template -const_entry_proxy vector_base::operator()(size_type index) const +template +const_entry_proxy vector_base::operator()(size_type index) const { assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); assert( index < size() && bool("Index out of bounds!") ); @@ -587,8 +587,8 @@ assert( index < size() && bool("Index out of bounds!") ); return const_entry_proxy(start_ + stride_ * index, elements_); } -template -const_entry_proxy vector_base::operator[](size_type index) const +template +const_entry_proxy vector_base::operator[](size_type index) const { assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); assert( index < size() && bool("Index out of bounds!") ); @@ -602,8 +602,8 @@ return const_entry_proxy(start_ + stride_ * index, elements_); // // Operator overloads with implicit conversion (thus cannot be made global without introducing additional headache) // -template -vector_base & vector_base::operator += (const self_type & vec) +template +vector_base & vector_base::operator += (const self_type & vec) { assert(vec.size() == size() && bool("Incompatible vector sizes!")); @@ -614,8 +614,8 @@ if (size() > 0) return *this; } -template -vector_base & vector_base::operator -= (const self_type & vec) +template +vector_base & vector_base::operator -= (const self_type & vec) { assert(vec.size() == size() && bool("Incompatible vector sizes!")); @@ -627,8 +627,8 @@ return *this; } /** @brief Scales a vector (or proxy) by a char (8-bit integer) value */ -template -vector_base & vector_base::operator *= (char val) +template +vector_base & vector_base::operator *= (char val) { if (size() > 0) viennacl::linalg::av(*this, @@ -636,8 +636,8 @@ if (size() > 0) return *this; } /** @brief Scales a vector (or proxy) by a short integer value */ -template -vector_base & vector_base::operator *= (short val) +template +vector_base & vector_base::operator *= (short val) { if (size() > 0) viennacl::linalg::av(*this, @@ -645,8 +645,8 @@ if (size() > 0) return *this; } /** @brief Scales a vector (or proxy) by an integer value */ -template -vector_base & vector_base::operator *= (int val) +template +vector_base & vector_base::operator *= (int val) { if (size() > 0) viennacl::linalg::av(*this, @@ -654,8 +654,8 @@ if (size() > 0) return *this; } /** @brief Scales a vector (or proxy) by a long integer value */ -template -vector_base & vector_base::operator *= (long val) +template +vector_base & vector_base::operator *= (long val) { if (size() > 0) viennacl::linalg::av(*this, @@ -663,8 +663,8 @@ if (size() > 0) return *this; } /** @brief Scales a vector (or proxy) by a single precision floating point value */ -template -vector_base & vector_base::operator *= (float val) +template +vector_base & vector_base::operator *= (float val) { if (size() > 0) viennacl::linalg::av(*this, @@ -672,8 +672,8 @@ if (size() > 0) return *this; } /** @brief Scales a vector (or proxy) by a double precision floating point value */ -template -vector_base & vector_base::operator *= (double val) +template +vector_base & vector_base::operator *= (double val) { if (size() > 0) viennacl::linalg::av(*this, @@ -683,8 +683,8 @@ return *this; /** @brief Scales this vector by a char (8-bit) value */ -template -vector_base & vector_base::operator /= (char val) +template +vector_base & vector_base::operator /= (char val) { if (size() > 0) viennacl::linalg::av(*this, @@ -692,8 +692,8 @@ if (size() > 0) return *this; } /** @brief Scales this vector by a short integer value */ -template -vector_base & vector_base::operator /= (short val) +template +vector_base & vector_base::operator /= (short val) { if (size() > 0) viennacl::linalg::av(*this, @@ -701,8 +701,8 @@ if (size() > 0) return *this; } /** @brief Scales this vector by an integer value */ -template -vector_base & vector_base::operator /= (int val) +template +vector_base & vector_base::operator /= (int val) { if (size() > 0) viennacl::linalg::av(*this, @@ -710,8 +710,8 @@ if (size() > 0) return *this; } /** @brief Scales this vector by a long integer value */ -template -vector_base & vector_base::operator /= (long val) +template +vector_base & vector_base::operator /= (long val) { if (size() > 0) viennacl::linalg::av(*this, @@ -719,8 +719,8 @@ if (size() > 0) return *this; } /** @brief Scales this vector by a single precision floating point value */ -template -vector_base & vector_base::operator /= (float val) +template +vector_base & vector_base::operator /= (float val) { if (size() > 0) viennacl::linalg::av(*this, @@ -728,8 +728,8 @@ if (size() > 0) return *this; } /** @brief Scales this vector by a double precision floating point value */ -template -vector_base & vector_base::operator /= (double val) +template +vector_base & vector_base::operator /= (double val) { if (size() > 0) viennacl::linalg::av(*this, @@ -739,97 +739,97 @@ return *this; /** @brief Scales the vector by a char (8-bit value) 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (char value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (char value) const { return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a short integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (short value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (short value) const { return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by an integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (int value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (int value) const { return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a long integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (long value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (long value) const { return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a single precision floating point number 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (float value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (float value) const { return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a single precision floating point number 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (double value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (double value) const { return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a char (8-bit value) 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (char value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (char value) const { return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by a short integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (short value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (short value) const { return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by an integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (int value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (int value) const { return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by a long integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (long value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (long value) const { return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by a single precision floating point number 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (float value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (float value) const { return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by a double precision floating point number 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (double value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (double value) const { return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Sign flip for the vector. Emulated to be equivalent to -1.0 * vector */ -template -vector_expression, const NumericT, op_mult> -vector_base::operator-() const +template +vector_expression, const NumericT, op_mult> +vector_base::operator-() const { return vector_expression(*this, NumericT(-1.0)); } @@ -839,56 +839,56 @@ return vector_expression(*this, Numeri // /** @brief Returns an iterator pointing to the beginning of the vector (STL like)*/ -template -typename vector_base::iterator vector_base::begin() +template +typename vector_base::iterator vector_base::begin() { return iterator(*this, 0, start_, stride_); } /** @brief Returns an iterator pointing to the end of the vector (STL like)*/ -template -typename vector_base::iterator vector_base::end() +template +typename vector_base::iterator vector_base::end() { return iterator(*this, size(), start_, stride_); } /** @brief Returns a const-iterator pointing to the beginning of the vector (STL like)*/ -template -typename vector_base::const_iterator vector_base::begin() const +template +typename vector_base::const_iterator vector_base::begin() const { return const_iterator(*this, 0, start_, stride_); } -template -typename vector_base::const_iterator vector_base::end() const +template +typename vector_base::const_iterator vector_base::end() const { return const_iterator(*this, size(), start_, stride_); } -template -vector_base & vector_base::swap(self_type & other) +template +vector_base & vector_base::swap(self_type & other) { viennacl::linalg::vector_swap(*this, other); return *this; } -template -void vector_base::clear() +template +void vector_base::clear() { viennacl::linalg::vector_assign(*this, cpu_value_type(0.0), true); } -template -vector_base & vector_base::fast_swap(self_type & other) +template +vector_base & vector_base::fast_swap(self_type & other) { assert(this->size_ == other.size_ && bool("Vector size mismatch")); this->elements_.swap(other.elements_); return *this; } -template -void vector_base::pad() +template +void vector_base::pad() { if (internal_size() != size()) { @@ -897,8 +897,8 @@ if (internal_size() != size()) } } -template -void vector_base::switch_memory_context(viennacl::context new_ctx) +template +void vector_base::switch_memory_context(viennacl::context new_ctx) { viennacl::backend::switch_memory_context(elements_, new_ctx); } @@ -907,20 +907,20 @@ viennacl::backend::switch_memory_context(elements_, new_ctx); //void insert_element(unsigned int i, NumericT val){} //void erase_element(unsigned int i){} -template -void vector_base::resize(size_type new_size, bool preserve) +template +void vector_base::resize(size_type new_size, bool preserve) { resize_impl(new_size, viennacl::traits::context(*this), preserve); } -template -void vector_base::resize(size_type new_size, viennacl::context ctx, bool preserve) +template +void vector_base::resize(size_type new_size, viennacl::context ctx, bool preserve) { resize_impl(new_size, ctx, preserve); } -template -void vector_base::resize_impl(size_type new_size, viennacl::context ctx, bool preserve) +template +void vector_base::resize_impl(size_type new_size, viennacl::context ctx, bool preserve) { assert(new_size > 0 && bool("Positive size required when resizing vector!")); -- GitLab From 95e6e1be85eecb88bc697d3082d0c15378461970 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 26 Dec 2018 12:25:17 -0600 Subject: [PATCH 25/46] 5% of changes to handle the new handle. Needs a lot more work for this to actually work. --- viennacl/forwards.h | 4 +- .../linalg/host_based/vector_operations.hpp | 6 +- viennacl/linalg/norm_2.hpp | 10 +-- viennacl/linalg/opencl/vector_operations.hpp | 10 +-- viennacl/linalg/vector_operations.hpp | 6 +- viennacl/meta/result_of.hpp | 10 +-- viennacl/ocl/context.hpp | 83 +----------------- viennacl/ocl/forwards.h | 3 +- viennacl/ocl/handle.hpp | 87 ++++++++++++++++++- viennacl/ocl/kernel.hpp | 37 +++++--- viennacl/ocl/program.hpp | 1 - viennacl/traits/handle.hpp | 28 ++++-- viennacl/traits/start.hpp | 6 +- viennacl/traits/stride.hpp | 4 +- viennacl/vector.hpp | 17 ++-- 15 files changed, 167 insertions(+), 145 deletions(-) diff --git a/viennacl/forwards.h b/viennacl/forwards.h index 85fe0aca..fe0eae61 100644 --- a/viennacl/forwards.h +++ b/viennacl/forwards.h @@ -745,8 +745,8 @@ namespace viennacl void norm_2_impl(viennacl::vector_expression const & vec, scalar & result); - template> - void norm_2_cpu(vector_base const & vec, T & result); + template + void norm_2_cpu(vector_base const & vec, T & result); template void norm_2_cpu(viennacl::vector_expression const & vec, diff --git a/viennacl/linalg/host_based/vector_operations.hpp b/viennacl/linalg/host_based/vector_operations.hpp index 9180e4c9..c7fe386e 100644 --- a/viennacl/linalg/host_based/vector_operations.hpp +++ b/viennacl/linalg/host_based/vector_operations.hpp @@ -272,7 +272,7 @@ void avbv_v(vector_base & vec1, * @param up_to_internal_size Specifies whether alpha should also be written to padded memory (mostly used for clearing the whole buffer). */ template -void vector_assign(vector_base & vec1, const NumericT & alpha, bool up_to_internal_size = false) +void vector_assign(vector_base & vec1, const NumericT & alpha, bool up_to_internal_size = false) { typedef NumericT value_type; @@ -817,8 +817,8 @@ VIENNACL_NORM_2_IMPL_2(double, double) * @param vec1 The vector * @param result The result scalar */ -template -void norm_2_impl(vector_base const & vec1, +template +void norm_2_impl(vector_base const & vec1, ScalarT & result) { typedef NumericT value_type; diff --git a/viennacl/linalg/norm_2.hpp b/viennacl/linalg/norm_2.hpp index 713b64b6..26a2da01 100644 --- a/viennacl/linalg/norm_2.hpp +++ b/viennacl/linalg/norm_2.hpp @@ -106,14 +106,14 @@ namespace viennacl // VIENNACL // template< typename ScalarType, typename H=viennacl::ocl::handle > - viennacl::scalar_expression< const viennacl::vector_base, - const viennacl::vector_base, + viennacl::scalar_expression< const viennacl::vector_base, + const viennacl::vector_base, viennacl::op_norm_2 > - norm_2(viennacl::vector_base const & v) + norm_2(viennacl::vector_base const & v) { //std::cout << "viennacl .. " << std::endl; - return viennacl::scalar_expression< const viennacl::vector_base, - const viennacl::vector_base, + return viennacl::scalar_expression< const viennacl::vector_base, + const viennacl::vector_base, viennacl::op_norm_2 >(v, v); } diff --git a/viennacl/linalg/opencl/vector_operations.hpp b/viennacl/linalg/opencl/vector_operations.hpp index 44b45de0..324a31a7 100644 --- a/viennacl/linalg/opencl/vector_operations.hpp +++ b/viennacl/linalg/opencl/vector_operations.hpp @@ -243,7 +243,7 @@ void avbv_v(vector_base & vec1, * @param up_to_internal_size Specifies whether alpha should also be written to padded memory (mostly used for clearing the whole buffer). */ template -void vector_assign(vector_base & vec1, const T & alpha, bool up_to_internal_size = false) +void vector_assign(vector_base & vec1, const T & alpha, bool up_to_internal_size = false) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec1).context()); viennacl::linalg::opencl::kernels::vector::init(ctx); @@ -789,8 +789,8 @@ void inner_prod_cpu(vector_base const & vec1, * @param norm_id Norm selector. 0: norm_inf, 1: norm_1, 2: norm_2 */ template -void norm_reduction_impl(vector_base const & vec, - vector_base & partial_result, +void norm_reduction_impl(vector_base const & vec, + vector_base & partial_result, cl_uint norm_id) { assert(viennacl::traits::opencl_handle(vec).context() == viennacl::traits::opencl_handle(partial_result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -801,7 +801,7 @@ void norm_reduction_impl(vector_base const & v viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector::program_name(), "norm"); assert( (k.global_work_size() / k.local_work_size() <= partial_result.size()) && bool("Size mismatch for partial reduction in norm_reduction_impl()") ); - std::cout << "Computing norm of " << viennacl::traits::opencl_handle(vec) << std::endl; + std::cout << "Computing norm of " << viennacl::traits::opencl_handle(vec).get() << std::endl; viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec), cl_uint(viennacl::traits::start(vec)), @@ -919,7 +919,7 @@ void norm_2_impl(vector_base const & vec, * @param result The result scalar */ template -void norm_2_cpu(vector_base const & vec, +void norm_2_cpu(vector_base const & vec, T & result) { std::cout << "norm_2_kernel asked for a vector.\n"; diff --git a/viennacl/linalg/vector_operations.hpp b/viennacl/linalg/vector_operations.hpp index e7c17a4a..238ecdcc 100644 --- a/viennacl/linalg/vector_operations.hpp +++ b/viennacl/linalg/vector_operations.hpp @@ -184,7 +184,7 @@ namespace viennacl * @param up_to_internal_size Whether 'alpha' should be written to padded memory as well. This is used for setting all entries to zero, including padded memory. */ template - void vector_assign(vector_base & vec1, const T & alpha, bool up_to_internal_size = false) + void vector_assign(vector_base & vec1, const T & alpha, bool up_to_internal_size = false) { switch (viennacl::traits::handle(vec1).get_active_handle_id()) { @@ -808,8 +808,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void norm_2_cpu(vector_base const & vec, + template + void norm_2_cpu(vector_base const & vec, T & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) diff --git a/viennacl/meta/result_of.hpp b/viennacl/meta/result_of.hpp index 328427c4..67cb5384 100644 --- a/viennacl/meta/result_of.hpp +++ b/viennacl/meta/result_of.hpp @@ -79,8 +79,8 @@ struct alignment enum { value = alignment::value }; }; -template -struct alignment< vector > +template +struct alignment< vector > { enum { value = AlignmentV }; }; @@ -142,8 +142,8 @@ struct size_type }; /** \cond */ -template -struct size_type< vector_base > +template +struct size_type< vector_base > { typedef SizeType type; }; @@ -346,7 +346,7 @@ struct cpu_value_type > }; template -struct cpu_value_type > +struct cpu_value_type > { typedef T type; }; diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 58f6a17d..2f2bf961 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -43,8 +43,8 @@ #include #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/error.hpp" -#include "viennacl/ocl/handle.hpp" #include "viennacl/ocl/kernel.hpp" +#include "viennacl/ocl/handle.hpp" #include "viennacl/ocl/program.hpp" #include "viennacl/ocl/device.hpp" #include "viennacl/ocl/platform.hpp" @@ -141,87 +141,6 @@ namespace ocl // }}} - // {{{ pooled handle - // - - class pooled_clmem_handle: public handle - { - protected: - typedef handle super; - - public: - pooled_clmem_handle() : super(), m_size(0), m_ref(0) {} - pooled_clmem_handle(const cl_mem & something, viennacl::ocl::context const & c, vcl_size_t & _s, uint32_t _r=1) : super(something, c), m_size(_s), m_ref(_r) - {if(h_!=0) - { - inc(); - cl_int err = clRetainMemObject(something); - VIENNACL_ERR_CHECK(err); - } - } - pooled_clmem_handle(const pooled_clmem_handle & other) : super(other), m_size(other.m_size), m_ref(other.m_ref) - { - if(h_!=0) - inc(); - } - - pooled_clmem_handle & operator=(const pooled_clmem_handle & other) - { - if (h_ != 0) - dec(); - h_ = other.h_; - p_context_ = other.p_context_; - m_size = other.m_size; - m_ref = other.m_ref; - inc(); - return *this; - } - - pooled_clmem_handle & operator=(const cl_mem & something) - { - std::cerr << "[pooled_handle]: Pooled handle needs to know about size\n"; - throw std::exception(); - return *this; - } - - /** @brief Swaps the OpenCL handle of two handle objects */ - pooled_clmem_handle & swap(pooled_clmem_handle & other) - { - cl_mem tmp = other.h_; - other.h_ = this->h_; - this->h_ = tmp; - - viennacl::ocl::context const * tmp2 = other.p_context_; - other.p_context_ = this->p_context_; - this->p_context_ = tmp2; - - size_t tmp3 = other.m_size; - other.m_size = this->m_size; - this->m_size = tmp3; - - uint32_t tmp4 = other.m_ref; - other.m_ref = this->m_ref; - this->m_ref = tmp4; - - return *this; - } - - void inc() - { - cl_int err = clRetainMemObject(h_); - VIENNACL_ERR_CHECK(err); - std::cout << "[pooled_handle]: Incrementing counter." << std::endl; - ++m_ref; - } - inline virtual void dec(); - virtual ~pooled_clmem_handle() { - if (h_!=0) dec(); - } - - private: - size_t m_size; - uint32_t m_ref; - }; // }}} diff --git a/viennacl/ocl/forwards.h b/viennacl/ocl/forwards.h index 56ba898f..091feeb2 100644 --- a/viennacl/ocl/forwards.h +++ b/viennacl/ocl/forwards.h @@ -41,8 +41,7 @@ namespace viennacl /** @brief A tag denoting the default OpenCL device type (SDK-specific) */ struct default_tag {}; - - template + template class handle; class kernel; diff --git a/viennacl/ocl/handle.hpp b/viennacl/ocl/handle.hpp index 8b0f1a10..2dbc8b70 100644 --- a/viennacl/ocl/handle.hpp +++ b/viennacl/ocl/handle.hpp @@ -33,6 +33,7 @@ #include #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/error.hpp" +#include "viennacl/forwards.h" namespace viennacl { @@ -192,12 +193,12 @@ namespace viennacl const OCL_TYPE & get() const { return h_; } - viennacl::ocl::context const & context() const + virtual viennacl::ocl::context const & context() const { assert(p_context_ != NULL && bool("Logic error: Accessing dangling context from handle.")); return *p_context_; } - void context(viennacl::ocl::context const & c) { p_context_ = &c; } + virtual void context(viennacl::ocl::context const & c) { p_context_ = &c; } /** @brief Swaps the OpenCL handle of two handle objects */ @@ -224,6 +225,88 @@ namespace viennacl viennacl::ocl::context const * p_context_; }; + // {{{ pooled handle + // + + class pooled_clmem_handle: public handle + { + protected: + typedef handle super; + + public: + pooled_clmem_handle() : super(), m_size(0), m_ref(0) {} + pooled_clmem_handle(const cl_mem & something, viennacl::ocl::context const & c, vcl_size_t & _s, uint32_t _r=1) : super(something, c), m_size(_s), m_ref(_r) + {if(h_!=0) + { + inc(); + cl_int err = clRetainMemObject(something); + VIENNACL_ERR_CHECK(err); + } + } + pooled_clmem_handle(const pooled_clmem_handle & other) : super(other), m_size(other.m_size), m_ref(other.m_ref) + { + if(h_!=0) + inc(); + } + + pooled_clmem_handle & operator=(const pooled_clmem_handle & other) + { + if (h_ != 0) + dec(); + h_ = other.h_; + p_context_ = other.p_context_; + m_size = other.m_size; + m_ref = other.m_ref; + inc(); + return *this; + } + + pooled_clmem_handle & operator=(const cl_mem & something) + { + std::cerr << "[pooled_handle]: Pooled handle needs to know about size\n"; + throw std::exception(); + return *this; + } + + /** @brief Swaps the OpenCL handle of two handle objects */ + pooled_clmem_handle & swap(pooled_clmem_handle & other) + { + cl_mem tmp = other.h_; + other.h_ = this->h_; + this->h_ = tmp; + + viennacl::ocl::context const * tmp2 = other.p_context_; + other.p_context_ = this->p_context_; + this->p_context_ = tmp2; + + size_t tmp3 = other.m_size; + other.m_size = this->m_size; + this->m_size = tmp3; + + uint32_t tmp4 = other.m_ref; + other.m_ref = this->m_ref; + this->m_ref = tmp4; + + return *this; + } + + void inc() + { + cl_int err = clRetainMemObject(h_); + VIENNACL_ERR_CHECK(err); + std::cout << "[pooled_handle]: Incrementing counter." << std::endl; + ++m_ref; + } + virtual void dec(); + virtual ~pooled_clmem_handle() { + if (h_!=0) dec(); + } + + private: + size_t m_size; + uint32_t m_ref; + }; + } //namespace ocl } //namespace viennacl diff --git a/viennacl/ocl/kernel.hpp b/viennacl/ocl/kernel.hpp index 5f2cab13..900e6d80 100644 --- a/viennacl/ocl/kernel.hpp +++ b/viennacl/ocl/kernel.hpp @@ -223,6 +223,30 @@ namespace viennacl VIENNACL_ERR_CHECK(err); } + + //forward handles directly: + /** @brief Sets an OpenCL object at the provided position */ + template + void arg(unsigned int pos, viennacl::ocl::handle const & h) + { + CL_TYPE temp = h.get(); + #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) + std::cout << "ViennaCL: Setting handle kernel argument " << temp << " at pos " << pos << " for kernel " << name_ << std::endl; + #endif + cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(CL_TYPE), (void*)&temp); + VIENNACL_ERR_CHECK(err); + } + + void arg(unsigned int pos, viennacl::ocl::pooled_clmem_handle const & h) + { + cl_mem temp = h.get(); + #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) + std::cout << "ViennaCL: Setting handle kernel argument " << temp << " at pos " << pos << " for kernel " << name_ << std::endl; + #endif + cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_mem), (void*)&temp); + VIENNACL_ERR_CHECK(err); + } + //generic handling: call .handle() member /** @brief Sets an OpenCL memory object at the provided position */ template @@ -238,19 +262,6 @@ namespace viennacl VIENNACL_ERR_CHECK(err); } - //forward handles directly: - /** @brief Sets an OpenCL object at the provided position */ - template - void arg(unsigned int pos, viennacl::ocl::handle const & h) - { - CL_TYPE temp = h.get(); - #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) - std::cout << "ViennaCL: Setting handle kernel argument " << temp << " at pos " << pos << " for kernel " << name_ << std::endl; - #endif - cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(CL_TYPE), (void*)&temp); - VIENNACL_ERR_CHECK(err); - } - //local buffer argument: /** @brief Sets an OpenCL local memory object at the provided position */ diff --git a/viennacl/ocl/program.hpp b/viennacl/ocl/program.hpp index aa2c7d8e..79e70c89 100644 --- a/viennacl/ocl/program.hpp +++ b/viennacl/ocl/program.hpp @@ -26,7 +26,6 @@ #include #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/handle.hpp" -#include "viennacl/ocl/kernel.hpp" #include "viennacl/tools/shared_ptr.hpp" namespace viennacl diff --git a/viennacl/traits/handle.hpp b/viennacl/traits/handle.hpp index df1fb9f5..67ed71fc 100644 --- a/viennacl/traits/handle.hpp +++ b/viennacl/traits/handle.hpp @@ -38,14 +38,14 @@ namespace traits // /** @brief Returns the generic memory handle of an object. Non-const version. */ template -viennacl::backend::mem_handle<> & handle(T & obj) +typename T::handle_type & handle(T & obj) { return obj.handle(); } /** @brief Returns the generic memory handle of an object. Const-version. */ template -viennacl::backend::mem_handle<> const & handle(T const & obj) +typename T::handle_type const & handle(T const & obj) { return obj.handle(); } @@ -184,25 +184,37 @@ inline viennacl::tools::shared_ptr const & ram_handle(viennacl::backend::m return h.ram_handle(); } /** \endcond */ - // // OpenCL handle extraction // #ifdef VIENNACL_WITH_OPENCL /** @brief Generic helper routine for extracting the OpenCL handle of a ViennaCL object. Non-const version. */ -template> -H & opencl_handle(T & obj) +template +viennacl::ocl::pooled_clmem_handle & opencl_handle(viennacl::vector_base & obj) +{ + return viennacl::traits::handle(obj).opencl_handle(); +} + +template +viennacl::ocl::pooled_clmem_handle const & opencl_handle(viennacl::vector_base const & obj) +{ + return viennacl::traits::handle(obj).opencl_handle(); +} + +template +viennacl::ocl::handle & opencl_handle(T & obj) { return viennacl::traits::handle(obj).opencl_handle(); } /** @brief Generic helper routine for extracting the OpenCL handle of a ViennaCL object. Const version. */ -template -H const & opencl_handle(T const & obj) +template +viennacl::ocl::handle const & opencl_handle(T const & obj) { return viennacl::traits::handle(obj).opencl_handle(); } + inline cl_char opencl_handle(char val) { return val; } //for unification purposes when passing CPU-scalars to kernels inline cl_short opencl_handle(short val) { return val; } //for unification purposes when passing CPU-scalars to kernels inline cl_int opencl_handle(int val) { return val; } //for unification purposes when passing CPU-scalars to kernels @@ -223,7 +235,7 @@ viennacl::ocl::handle const & opencl_handle(viennacl::vector_expression< } template -viennacl::ocl::pooled_clmem_handle const & opencl_handle(viennacl::vector_expression, op_prod> const & obj) +viennacl::ocl::pooled_clmem_handle const & opencl_handle(viennacl::vector_expression, op_prod> const & obj) { return viennacl::traits::handle(obj.rhs()).opencl_handle(); } diff --git a/viennacl/traits/start.hpp b/viennacl/traits/start.hpp index c81a3b35..a01581e2 100644 --- a/viennacl/traits/start.hpp +++ b/viennacl/traits/start.hpp @@ -47,9 +47,9 @@ start(T const & obj) } //ViennaCL vector leads to start index 0: -template -typename result_of::size_type >::type -start(viennacl::vector const &) +template +typename result_of::size_type >::type +start(viennacl::vector const &) { return 0; } diff --git a/viennacl/traits/stride.hpp b/viennacl/traits/stride.hpp index 40f1eb52..dcc79268 100644 --- a/viennacl/traits/stride.hpp +++ b/viennacl/traits/stride.hpp @@ -41,8 +41,8 @@ namespace traits // inc: Increment for vectors. Defaults to 1 // template -typename result_of::size_type< viennacl::vector_base >::type -stride(viennacl::vector_base const & s) { return s.stride(); } +typename result_of::size_type< viennacl::vector_base >::type +stride(viennacl::vector_base const & s) { return s.stride(); } // // inc1: Row increment for matrices. Defaults to 1 diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index 5df23e68..474ef38c 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -122,7 +122,7 @@ public: * @param start First index of the element in the vector pointed to be the iterator (for vector_range and vector_slice) * @param stride Stride for the support of vector_slice */ - const_vector_iterator(vector_base const & vec, + const_vector_iterator(vector_base const & vec, size_type index, size_type start = 0, size_type stride = 1) : elements_(vec.handle()), index_(index), start_(start), stride_(stride) {} @@ -221,7 +221,7 @@ public: * @param start Offset from the beginning of the underlying vector (for ranges and slices) * @param stride Stride for slices */ - vector_iterator(vector_base & vec, + vector_iterator(vector_base & vec, size_type index, size_type start = 0, size_type stride = 1) : base_type(vec, index, start, stride), elements_(vec.handle()) {} @@ -949,10 +949,10 @@ void vector_base::resize_impl(size_type n template -class vector : public vector_base +class vector : public vector_base { - typedef vector self_type; - typedef vector_base base_type; + typedef vector self_type; + typedef vector_base base_type; public: typedef typename base_type::size_type size_type; @@ -1265,8 +1265,8 @@ void fast_copy(const const_vector_iterator & gpu_begin, * @param gpu_vec A gpu vector. * @param cpu_vec The cpu vector. Type requirements: Output iterator pointing to entries linear in memory can be obtained via member function .begin() */ -template -void fast_copy(vector_base const & gpu_vec, CPUVECTOR & cpu_vec ) +template +void fast_copy(vector_base const & gpu_vec, CPUVECTOR & cpu_vec ) { viennacl::fast_copy(gpu_vec.begin(), gpu_vec.end(), cpu_vec.begin()); } @@ -1433,8 +1433,7 @@ void fast_copy(CPU_ITERATOR const & cpu_begin, * @param gpu_vec The gpu vector. */ template -void fast_copy(const CPUVECTOR & cpu_vec, vector_base & gpu_vec) +void fast_copy(const CPUVECTOR & cpu_vec, vector_base & gpu_vec) { viennacl::fast_copy(cpu_vec.begin(), cpu_vec.end(), gpu_vec.begin()); } -- GitLab From c3f1dd4ea530dcd08527a1baaeb0b3a82f53c5db Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 26 Dec 2018 13:01:44 -0600 Subject: [PATCH 26/46] reverts the exclusion of kernel in program --- viennacl/ocl/program.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/viennacl/ocl/program.hpp b/viennacl/ocl/program.hpp index 79e70c89..aa2c7d8e 100644 --- a/viennacl/ocl/program.hpp +++ b/viennacl/ocl/program.hpp @@ -26,6 +26,7 @@ #include #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/handle.hpp" +#include "viennacl/ocl/kernel.hpp" #include "viennacl/tools/shared_ptr.hpp" namespace viennacl -- GitLab From 1b49ea1ed2ed057aec4abffd430e60ebce767d5b Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 26 Dec 2018 15:58:55 -0600 Subject: [PATCH 27/46] forward declares the new fast_copy --- viennacl/backend/memory.hpp | 2 +- viennacl/forwards.h | 6 +++--- viennacl/vector.hpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/viennacl/backend/memory.hpp b/viennacl/backend/memory.hpp index 318c8efd..9513bbd8 100644 --- a/viennacl/backend/memory.hpp +++ b/viennacl/backend/memory.hpp @@ -293,7 +293,7 @@ namespace backend * @param ptr Location in main RAM where to read data should be written to * @param async Whether the operation should be asynchronous */ - template > + template inline void memory_read(mem_handle const & src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, diff --git a/viennacl/forwards.h b/viennacl/forwards.h index fe0eae61..2a0ad856 100644 --- a/viennacl/forwards.h +++ b/viennacl/forwards.h @@ -316,9 +316,9 @@ namespace viennacl const_vector_iterator const & gpu_src_end, const_vector_iterator gpu_dest_begin); - template - void fast_copy(const const_vector_iterator & gpu_begin, - const const_vector_iterator & gpu_end, + template + void fast_copy(const const_vector_iterator & gpu_begin, + const const_vector_iterator & gpu_end, CPU_ITERATOR cpu_begin ); template diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index 474ef38c..92363cc3 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -1232,7 +1232,7 @@ vector_tuple tie(vector_base & v0, * @param gpu_end GPU iterator pointing to the end of the vector (STL-like) * @param cpu_begin Output iterator for the cpu vector. The cpu vector must be at least as long as the gpu vector! */ -template +template void fast_copy(const const_vector_iterator & gpu_begin, const const_vector_iterator & gpu_end, CPU_ITERATOR cpu_begin ) @@ -1265,7 +1265,7 @@ void fast_copy(const const_vector_iterator & gpu_begin, * @param gpu_vec A gpu vector. * @param cpu_vec The cpu vector. Type requirements: Output iterator pointing to entries linear in memory can be obtained via member function .begin() */ -template +template void fast_copy(vector_base const & gpu_vec, CPUVECTOR & cpu_vec ) { viennacl::fast_copy(gpu_vec.begin(), gpu_vec.end(), cpu_vec.begin()); -- GitLab From b6b7902fb4181bae9c657b4f49c4048eff642e83 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 26 Dec 2018 17:01:42 -0600 Subject: [PATCH 28/46] added the extra bits in the memory pool, now need to call those from pooled handle --- viennacl/ocl/mempool/mempool.hpp | 50 ++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/viennacl/ocl/mempool/mempool.hpp b/viennacl/ocl/mempool/mempool.hpp index 8a82a930..d6d94f0b 100644 --- a/viennacl/ocl/mempool/mempool.hpp +++ b/viennacl/ocl/mempool/mempool.hpp @@ -26,17 +26,17 @@ namespace ocl class memory_pool : mempool::noncopyable { public: - typedef cl_mem pointer_type; typedef size_t size_type; private: typedef uint32_t bin_nr_t; - typedef std::vector bin_t; + typedef std::vector bin_t; typedef std::map container_t; container_t m_container; typedef typename container_t::value_type bin_pair_t; + std::map m_reference_count; // ref counter std::unique_ptr m_allocator; // A held block is one that's been released by the application, but that @@ -145,7 +145,7 @@ namespace ocl { } public: - pointer_type allocate(size_type size) + cl_mem allocate(size_type size) { bin_nr_t bin_nr = bin_number(size); bin_t &bin = get_bin(bin_nr); @@ -197,7 +197,7 @@ namespace ocl throw viennacl::ocl::mem_object_allocation_failure(); } - void free(pointer_type p, size_type size) + void free(cl_mem p, size_type size) { std::cout << "[mempool]: freeing the memory " << @@ -270,23 +270,56 @@ namespace ocl return false; } + void increment_ref_counter(cl_mem p, size_type s) + { + if(m_reference_count.find(p) != m_reference_count.end()) + { + std::cerr << "Did not find a memory to reference count.\n"; + throw std::exception(); + } + + ++m_reference_count[p]; + } + + void decrement_ref_counter(cl_mem p, size_type s) + { + if(m_reference_count.find(p) != m_reference_count.end()) + { + std::cerr << "Did not find a memory to reference count.\n"; + throw std::exception(); + } + + --m_reference_count[p]; + + if(m_reference_count[p] == 0) + { + // this is not longer useful => free it + free(p, s); + + // no longer need to store this in the map + m_reference_count.erase(p); + } + } + private: - pointer_type get_from_allocator(size_type alloc_sz) + cl_mem get_from_allocator(size_type alloc_sz) { - pointer_type result = m_allocator->allocate(alloc_sz); + cl_mem result = m_allocator->allocate(alloc_sz); ++m_active_blocks; return result; } - pointer_type pop_block_from_bin(bin_t &bin, size_type size) + cl_mem pop_block_from_bin(bin_t &bin, size_type size) { - pointer_type result = bin.back(); + cl_mem result = bin.back(); bin.pop_back(); dec_held_blocks(); ++m_active_blocks; + m_reference_count[result] = 1; + return result; } }; @@ -294,4 +327,3 @@ namespace ocl } #endif - -- GitLab From 447c14d2b2d2ddb0d2b51329cfb1a19e321120fd Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 27 Dec 2018 09:55:44 -0600 Subject: [PATCH 29/46] added the global reference counting --- viennacl/ocl/context.hpp | 27 +++++++++++--------- viennacl/ocl/handle.hpp | 20 +++------------ viennacl/ocl/mempool/mempool.hpp | 44 ++++++++++++-------------------- 3 files changed, 35 insertions(+), 56 deletions(-) diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 2f2bf961..a2a6baae 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -320,11 +320,16 @@ public: return mem; } - /// [KK]: TODOTODOTODOTODO + /// [KK]: TODOTODOTODOTODO Documentation + void decrement_mem_ref_counter(cl_mem p, vcl_size_t s) const + { + get_mempool()->decrement_ref_counter(p, s); + } + - void deallocate_memory_in_pool(cl_mem p, size_t size) const + void increment_mem_ref_counter(cl_mem p, vcl_size_t s) const { - get_mempool()->free(p, size); + get_mempool()->increment_ref_counter(p, s); } @@ -998,17 +1003,15 @@ cl_mem cl_immediate_allocator::allocate(size_t s) // {{{ pooled handle dec +void pooled_clmem_handle::inc() +{ + p_context_->increment_mem_ref_counter(h_, m_size); +} + + void pooled_clmem_handle::dec() { - std::cout << "[pooled_handle]: Decrementing ref counter of value " << m_ref << std::endl; - if(m_ref == 0) { - std::cerr << "[pooled_handle]: Destroying an already destroyed memory object." << std::endl; - throw std::exception(); - } - --m_ref; - if(m_ref == 0) { - p_context_->deallocate_memory_in_pool(h_, m_size); - } + p_context_->decrement_mem_ref_counter(h_, m_size); } // }}} diff --git a/viennacl/ocl/handle.hpp b/viennacl/ocl/handle.hpp index 2dbc8b70..11aa3633 100644 --- a/viennacl/ocl/handle.hpp +++ b/viennacl/ocl/handle.hpp @@ -234,8 +234,8 @@ namespace viennacl typedef handle super; public: - pooled_clmem_handle() : super(), m_size(0), m_ref(0) {} - pooled_clmem_handle(const cl_mem & something, viennacl::ocl::context const & c, vcl_size_t & _s, uint32_t _r=1) : super(something, c), m_size(_s), m_ref(_r) + pooled_clmem_handle() : super(), m_size(0) {} + pooled_clmem_handle(const cl_mem & something, viennacl::ocl::context const & c, vcl_size_t & _s) : super(something, c), m_size(_s) {if(h_!=0) { inc(); @@ -243,7 +243,7 @@ namespace viennacl VIENNACL_ERR_CHECK(err); } } - pooled_clmem_handle(const pooled_clmem_handle & other) : super(other), m_size(other.m_size), m_ref(other.m_ref) + pooled_clmem_handle(const pooled_clmem_handle & other) : super(other), m_size(other.m_size) { if(h_!=0) inc(); @@ -256,7 +256,6 @@ namespace viennacl h_ = other.h_; p_context_ = other.p_context_; m_size = other.m_size; - m_ref = other.m_ref; inc(); return *this; } @@ -283,20 +282,10 @@ namespace viennacl other.m_size = this->m_size; this->m_size = tmp3; - uint32_t tmp4 = other.m_ref; - other.m_ref = this->m_ref; - this->m_ref = tmp4; - return *this; } - void inc() - { - cl_int err = clRetainMemObject(h_); - VIENNACL_ERR_CHECK(err); - std::cout << "[pooled_handle]: Incrementing counter." << std::endl; - ++m_ref; - } + virtual void inc(); virtual void dec(); virtual ~pooled_clmem_handle() { if (h_!=0) dec(); @@ -304,7 +293,6 @@ namespace viennacl private: size_t m_size; - uint32_t m_ref; }; diff --git a/viennacl/ocl/mempool/mempool.hpp b/viennacl/ocl/mempool/mempool.hpp index d6d94f0b..764e681e 100644 --- a/viennacl/ocl/mempool/mempool.hpp +++ b/viennacl/ocl/mempool/mempool.hpp @@ -156,7 +156,12 @@ namespace ocl std::cout << "[pool] allocation of size " << size << " served from bin " << bin_nr << " which contained " << bin.size() << " entries" << std::endl; - return pop_block_from_bin(bin, size); + + cl_mem result = pop_block_from_bin(bin, size); + assert(m_reference_count.find(result) == m_reference_count.end() && bool("Memory already registered in reference counter.")); + m_reference_count[result] = 1; + + return result; } size_type alloc_sz = alloc_size(bin_nr); @@ -166,34 +171,19 @@ namespace ocl if (m_trace) std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl; - try { return get_from_allocator(alloc_sz); } - catch (viennacl::ocl::mem_object_allocation_failure &e) - { - throw; - } - - if (m_trace) - std::cout << "[pool] allocation triggered OOM, running GC" << std::endl; + try { + cl_mem result = get_from_allocator(alloc_sz); - // m_allocator->try_release_blocks(); - if (bin.size()) - return pop_block_from_bin(bin, size); + assert(m_reference_count.find(result) == m_reference_count.end() && bool("Memory already registered in reference counter.")); + m_reference_count[result] = 1; - if (m_trace) - std::cout << "[pool] allocation still OOM after GC" << std::endl; - - while (try_to_free_memory()) + return result; + } + catch (viennacl::ocl::mem_object_allocation_failure &e) { - try { return get_from_allocator(alloc_sz); } - catch (viennacl::ocl::mem_object_allocation_failure &e) - { - throw; - } + throw; } - std::cerr << ( - "memory_pool::allocate " - "failed to free memory for allocation\n"); throw viennacl::ocl::mem_object_allocation_failure(); } @@ -272,7 +262,7 @@ namespace ocl void increment_ref_counter(cl_mem p, size_type s) { - if(m_reference_count.find(p) != m_reference_count.end()) + if(m_reference_count.find(p) == m_reference_count.end()) { std::cerr << "Did not find a memory to reference count.\n"; throw std::exception(); @@ -283,7 +273,7 @@ namespace ocl void decrement_ref_counter(cl_mem p, size_type s) { - if(m_reference_count.find(p) != m_reference_count.end()) + if(m_reference_count.find(p) == m_reference_count.end()) { std::cerr << "Did not find a memory to reference count.\n"; throw std::exception(); @@ -318,8 +308,6 @@ namespace ocl dec_held_blocks(); ++m_active_blocks; - m_reference_count[result] = 1; - return result; } }; -- GitLab From c4db5de99d6be5736ce8e03a8f1c26de753f6f4a Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 27 Dec 2018 10:14:10 -0600 Subject: [PATCH 30/46] corrects template parameters for fast_copy --- viennacl/forwards.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/viennacl/forwards.h b/viennacl/forwards.h index 2a0ad856..203bc2d9 100644 --- a/viennacl/forwards.h +++ b/viennacl/forwards.h @@ -321,10 +321,10 @@ namespace viennacl const const_vector_iterator & gpu_end, CPU_ITERATOR cpu_begin ); - template + template void fast_copy(CPU_ITERATOR const & cpu_begin, CPU_ITERATOR const & cpu_end, - vector_iterator gpu_begin); + vector_iterator gpu_begin); /** @brief Tag class for indicating row-major layout of a matrix. Not passed to the matrix directly, see row_major type. */ -- GitLab From 8269f94ca77f4a33d64e1bc28eef5d20e03d1419 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 27 Dec 2018 10:44:52 -0600 Subject: [PATCH 31/46] Retains the memory after allocating(passes one test :D) --- viennacl/ocl/mempool/mempool.hpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/viennacl/ocl/mempool/mempool.hpp b/viennacl/ocl/mempool/mempool.hpp index 764e681e..8fef3def 100644 --- a/viennacl/ocl/mempool/mempool.hpp +++ b/viennacl/ocl/mempool/mempool.hpp @@ -159,7 +159,9 @@ namespace ocl cl_mem result = pop_block_from_bin(bin, size); assert(m_reference_count.find(result) == m_reference_count.end() && bool("Memory already registered in reference counter.")); - m_reference_count[result] = 1; + m_reference_count[result] = 0; + cl_int err = clRetainMemObject(result); + VIENNACL_ERR_CHECK(err); return result; } @@ -175,7 +177,10 @@ namespace ocl cl_mem result = get_from_allocator(alloc_sz); assert(m_reference_count.find(result) == m_reference_count.end() && bool("Memory already registered in reference counter.")); - m_reference_count[result] = 1; + + cl_int err = clRetainMemObject(result); + VIENNACL_ERR_CHECK(err); + m_reference_count[result] = 0; return result; } @@ -262,6 +267,7 @@ namespace ocl void increment_ref_counter(cl_mem p, size_type s) { + std::cout << "[mempool]: Incrementing for " << p << std::endl; if(m_reference_count.find(p) == m_reference_count.end()) { std::cerr << "Did not find a memory to reference count.\n"; @@ -273,6 +279,7 @@ namespace ocl void decrement_ref_counter(cl_mem p, size_type s) { + std::cout << "[mempool]: Decrementing for " << p << std::endl; if(m_reference_count.find(p) == m_reference_count.end()) { std::cerr << "Did not find a memory to reference count.\n"; -- GitLab From 73f0564327e6ce89e05e90fc6674e97bae0559ee Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 27 Dec 2018 12:47:17 -0600 Subject: [PATCH 32/46] WIP: added handle as a tempalte parameter --- viennacl/forwards.h | 2 +- viennacl/linalg/opencl/vector_operations.hpp | 98 +++++++++++--------- viennacl/vector.hpp | 66 ++++++------- 3 files changed, 86 insertions(+), 80 deletions(-) diff --git a/viennacl/forwards.h b/viennacl/forwards.h index 203bc2d9..90d53816 100644 --- a/viennacl/forwards.h +++ b/viennacl/forwards.h @@ -297,7 +297,7 @@ namespace viennacl template> class vector; - template + template> class vector_tuple; //the following forwards are needed for GMRES diff --git a/viennacl/linalg/opencl/vector_operations.hpp b/viennacl/linalg/opencl/vector_operations.hpp index 324a31a7..79129b4d 100644 --- a/viennacl/linalg/opencl/vector_operations.hpp +++ b/viennacl/linalg/opencl/vector_operations.hpp @@ -42,6 +42,12 @@ #include "viennacl/traits/handle.hpp" #include "viennacl/traits/stride.hpp" +#ifdef VIENNACL_WITH_OPENCL +#define TEMP_HANDLE viennacl::ocl::pooled_clmem_handle +#else +#define TEMP_HANDLE viennacl::ocl::handle +#endif + namespace viennacl { namespace linalg @@ -52,8 +58,8 @@ namespace opencl // // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here! // -template -void convert(vector_base & dest, vector_base const & src) +template +void convert(vector_base & dest, vector_base const & src) { assert(viennacl::traits::opencl_handle(dest).context() == viennacl::traits::opencl_handle(src).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -72,9 +78,9 @@ void convert(vector_base & dest, vector_base const & } -template -void av(vector_base & vec1, - vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) +template +void av(vector_base & vec1, + vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -112,10 +118,10 @@ void av(vector_base & vec1, } -template -void avbv(vector_base & vec1, - vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, - vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) +template +void avbv(vector_base & vec1, + vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, + vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); assert(viennacl::traits::opencl_handle(vec2).context() == viennacl::traits::opencl_handle(vec3).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -174,10 +180,10 @@ void avbv(vector_base & vec1, } -template -void avbv_v(vector_base & vec1, - vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, - vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) +template +void avbv_v(vector_base & vec1, + vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, + vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); assert(viennacl::traits::opencl_handle(vec2).context() == viennacl::traits::opencl_handle(vec3).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -268,8 +274,8 @@ void vector_assign(vector_base & vec1, const T & alpha, bool up_to_interna * @param vec1 The first vector (or -range, or -slice) * @param vec2 The second vector (or -range, or -slice) */ -template -void vector_swap(vector_base & vec1, vector_base & vec2) +template +void vector_swap(vector_base & vec1, vector_base & vec2) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -296,9 +302,9 @@ void vector_swap(vector_base & vec1, vector_base & vec2) * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, const vector_base, op_element_binary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, const vector_base, op_element_binary > const & proxy) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -343,9 +349,9 @@ void element_op(vector_base & vec1, * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, const T, op_element_binary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, const T, op_element_binary > const & proxy) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -387,9 +393,9 @@ void element_op(vector_base & vec1, * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, op_element_binary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, op_element_binary > const & proxy) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -432,9 +438,9 @@ void element_op(vector_base & vec1, * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, const vector_base, op_element_unary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, const vector_base, op_element_unary > const & proxy) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -471,10 +477,10 @@ void element_op(vector_base & vec1, * @param vec2 The second vector * @param partial_result The results of each group */ -template -void inner_prod_impl(vector_base const & vec1, - vector_base const & vec2, - vector_base & partial_result) +template +void inner_prod_impl(vector_base const & vec1, + vector_base const & vec2, + vector_base & partial_result) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); assert(viennacl::traits::opencl_handle(vec2).context() == viennacl::traits::opencl_handle(partial_result).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -520,9 +526,9 @@ void inner_prod_impl(vector_base const & vec1, * @param vec2 The second vector * @param result The result scalar (on the gpu) */ -template -void inner_prod_impl(vector_base const & vec1, - vector_base const & vec2, +template +void inner_prod_impl(vector_base const & vec1, + vector_base const & vec2, scalar & result) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -531,7 +537,7 @@ void inner_prod_impl(vector_base const & vec1, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec1).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec1)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec1)); temp.resize(work_groups, ctx); // bring default-constructed vectors to the correct size: // Step 1: Compute partial inner products for each work group: @@ -571,10 +577,10 @@ namespace detail * @param vec_tuple The tuple of vectors y1, y2, ..., yN * @param result The result vector */ -template -void inner_prod_impl(vector_base const & x, - vector_tuple const & vec_tuple, - vector_base & result) +template +void inner_prod_impl(vector_base const & x, + vector_tuple const & vec_tuple, + vector_base & result) { assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -762,7 +768,7 @@ void inner_prod_cpu(vector_base const & vec1, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec1).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec1)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec1)); temp.resize(work_groups, ctx); // bring default-constructed vectors to the correct size: // Step 1: Compute partial inner products for each work group: @@ -830,7 +836,7 @@ void norm_1_impl(vector_base const & vec, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 1); @@ -859,7 +865,7 @@ void norm_1_cpu(vector_base const & vec, T & result) { vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 1); @@ -894,7 +900,7 @@ void norm_2_impl(vector_base const & vec, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 2); @@ -924,7 +930,7 @@ void norm_2_cpu(vector_base const & vec, { std::cout << "norm_2_kernel asked for a vector.\n"; vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 2); @@ -959,7 +965,7 @@ void norm_inf_impl(vector_base const & vec, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 0); @@ -988,7 +994,7 @@ void norm_inf_cpu(vector_base const & vec, T & result) { vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 0); diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index 92363cc3..f0c4f5a1 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -1071,10 +1071,10 @@ public: }; //vector /** @brief Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from viennacl::tie(). */ -template +template class vector_tuple { - typedef vector_base VectorType; + typedef vector_base VectorType; public: // 2 vectors @@ -1153,65 +1153,65 @@ private: }; // 2 args -template -vector_tuple tie(vector_base const & v0, vector_base const & v1) { return vector_tuple(v0, v1); } +template +vector_tuple tie(vector_base const & v0, vector_base const & v1) { return vector_tuple(v0, v1); } -template -vector_tuple tie(vector_base & v0, vector_base & v1) { return vector_tuple(v0, v1); } +template +vector_tuple tie(vector_base & v0, vector_base & v1) { return vector_tuple(v0, v1); } // 3 args -template -vector_tuple tie(vector_base const & v0, vector_base const & v1, vector_base const & v2) { return vector_tuple(v0, v1, v2); } +template +vector_tuple tie(vector_base const & v0, vector_base const & v1, vector_base const & v2) { return vector_tuple(v0, v1, v2); } -template -vector_tuple tie(vector_base & v0, vector_base & v1, vector_base & v2) { return vector_tuple(v0, v1, v2); } +template +vector_tuple tie(vector_base & v0, vector_base & v1, vector_base & v2) { return vector_tuple(v0, v1, v2); } // 4 args -template -vector_tuple tie(vector_base const & v0, vector_base const & v1, vector_base const & v2, vector_base const & v3) +template +vector_tuple tie(vector_base const & v0, vector_base const & v1, vector_base const & v2, vector_base const & v3) { - return vector_tuple(v0, v1, v2, v3); + return vector_tuple(v0, v1, v2, v3); } -template -vector_tuple tie(vector_base & v0, vector_base & v1, vector_base & v2, vector_base & v3) +template +vector_tuple tie(vector_base & v0, vector_base & v1, vector_base & v2, vector_base & v3) { - return vector_tuple(v0, v1, v2, v3); + return vector_tuple(v0, v1, v2, v3); } // 5 args -template -vector_tuple tie(vector_base const & v0, - vector_base const & v1, - vector_base const & v2, - vector_base const & v3, - vector_base const & v4) -{ - typedef vector_base const * VectorPointerType; +template +vector_tuple tie(vector_base const & v0, + vector_base const & v1, + vector_base const & v2, + vector_base const & v3, + vector_base const & v4) +{ + typedef vector_base const * VectorPointerType; std::vector vec(5); vec[0] = &v0; vec[1] = &v1; vec[2] = &v2; vec[3] = &v3; vec[4] = &v4; - return vector_tuple(vec); + return vector_tuple(vec); } -template -vector_tuple tie(vector_base & v0, - vector_base & v1, - vector_base & v2, - vector_base & v3, - vector_base & v4) +template +vector_tuple tie(vector_base & v0, + vector_base & v1, + vector_base & v2, + vector_base & v3, + vector_base & v4) { - typedef vector_base * VectorPointerType; + typedef vector_base * VectorPointerType; std::vector vec(5); vec[0] = &v0; vec[1] = &v1; vec[2] = &v2; vec[3] = &v3; vec[4] = &v4; - return vector_tuple(vec); + return vector_tuple(vec); } // TODO: Add more arguments to tie() here. Maybe use some preprocessor magic to accomplish this. -- GitLab From 763d669644861be4c5bddc666212ab38032749b9 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 27 Dec 2018 17:18:38 -0600 Subject: [PATCH 33/46] more changes towards adding a template for the vector operations --- viennacl/linalg/opencl/vector_operations.hpp | 116 +++++++++---------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/viennacl/linalg/opencl/vector_operations.hpp b/viennacl/linalg/opencl/vector_operations.hpp index 79129b4d..69810d35 100644 --- a/viennacl/linalg/opencl/vector_operations.hpp +++ b/viennacl/linalg/opencl/vector_operations.hpp @@ -610,10 +610,10 @@ void inner_prod_impl(vector_base const & x, case 5: case 4: { - vector_base const & y0 = vec_tuple.const_at(current_index ); - vector_base const & y1 = vec_tuple.const_at(current_index + 1); - vector_base const & y2 = vec_tuple.const_at(current_index + 2); - vector_base const & y3 = vec_tuple.const_at(current_index + 3); + vector_base const & y0 = vec_tuple.const_at(current_index ); + vector_base const & y1 = vec_tuple.const_at(current_index + 1); + vector_base const & y2 = vec_tuple.const_at(current_index + 2); + vector_base const & y3 = vec_tuple.const_at(current_index + 3); viennacl::ocl::enqueue(inner_prod_kernel_4( viennacl::traits::opencl_handle(x), layout_x, viennacl::traits::opencl_handle(y0), detail::make_layout(y0), viennacl::traits::opencl_handle(y1), detail::make_layout(y1), @@ -638,9 +638,9 @@ void inner_prod_impl(vector_base const & x, case 3: { - vector_base const & y0 = vec_tuple.const_at(current_index ); - vector_base const & y1 = vec_tuple.const_at(current_index + 1); - vector_base const & y2 = vec_tuple.const_at(current_index + 2); + vector_base const & y0 = vec_tuple.const_at(current_index ); + vector_base const & y1 = vec_tuple.const_at(current_index + 1); + vector_base const & y2 = vec_tuple.const_at(current_index + 2); viennacl::ocl::enqueue(inner_prod_kernel_3( viennacl::traits::opencl_handle(x), layout_x, viennacl::traits::opencl_handle(y0), detail::make_layout(y0), viennacl::traits::opencl_handle(y1), detail::make_layout(y1), @@ -664,8 +664,8 @@ void inner_prod_impl(vector_base const & x, case 2: { - vector_base const & y0 = vec_tuple.const_at(current_index ); - vector_base const & y1 = vec_tuple.const_at(current_index + 1); + vector_base const & y0 = vec_tuple.const_at(current_index ); + vector_base const & y1 = vec_tuple.const_at(current_index + 1); viennacl::ocl::enqueue(inner_prod_kernel_2( viennacl::traits::opencl_handle(x), layout_x, viennacl::traits::opencl_handle(y0), detail::make_layout(y0), viennacl::traits::opencl_handle(y1), detail::make_layout(y1), @@ -688,7 +688,7 @@ void inner_prod_impl(vector_base const & x, case 1: { - vector_base const & y0 = vec_tuple.const_at(current_index ); + vector_base const & y0 = vec_tuple.const_at(current_index ); viennacl::ocl::enqueue(inner_prod_kernel_1( viennacl::traits::opencl_handle(x), layout_x, viennacl::traits::opencl_handle(y0), detail::make_layout(y0), viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type::type) * 1 * inner_prod_kernel_1.local_work_size()), @@ -710,14 +710,14 @@ void inner_prod_impl(vector_base const & x, default: //8 or more vectors { - vector_base const & y0 = vec_tuple.const_at(current_index ); - vector_base const & y1 = vec_tuple.const_at(current_index + 1); - vector_base const & y2 = vec_tuple.const_at(current_index + 2); - vector_base const & y3 = vec_tuple.const_at(current_index + 3); - vector_base const & y4 = vec_tuple.const_at(current_index + 4); - vector_base const & y5 = vec_tuple.const_at(current_index + 5); - vector_base const & y6 = vec_tuple.const_at(current_index + 6); - vector_base const & y7 = vec_tuple.const_at(current_index + 7); + vector_base const & y0 = vec_tuple.const_at(current_index ); + vector_base const & y1 = vec_tuple.const_at(current_index + 1); + vector_base const & y2 = vec_tuple.const_at(current_index + 2); + vector_base const & y3 = vec_tuple.const_at(current_index + 3); + vector_base const & y4 = vec_tuple.const_at(current_index + 4); + vector_base const & y5 = vec_tuple.const_at(current_index + 5); + vector_base const & y6 = vec_tuple.const_at(current_index + 6); + vector_base const & y7 = vec_tuple.const_at(current_index + 7); viennacl::ocl::enqueue(inner_prod_kernel_8( viennacl::traits::opencl_handle(x), layout_x, viennacl::traits::opencl_handle(y0), detail::make_layout(y0), viennacl::traits::opencl_handle(y1), detail::make_layout(y1), @@ -758,9 +758,9 @@ void inner_prod_impl(vector_base const & x, * @param vec2 The second vector * @param result The result scalar (on the gpu) */ -template -void inner_prod_cpu(vector_base const & vec1, - vector_base const & vec2, +template +void inner_prod_cpu(vector_base const & vec1, + vector_base const & vec2, T & result) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -827,8 +827,8 @@ void norm_reduction_impl(vector_base const & vec, * @param vec The vector * @param result The result scalar */ -template -void norm_1_impl(vector_base const & vec, +template +void norm_1_impl(vector_base const & vec, scalar & result) { assert(viennacl::traits::opencl_handle(vec).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -860,8 +860,8 @@ void norm_1_impl(vector_base const & vec, * @param vec The vector * @param result The result scalar */ -template -void norm_1_cpu(vector_base const & vec, +template +void norm_1_cpu(vector_base const & vec, T & result) { vcl_size_t work_groups = 128; @@ -891,8 +891,8 @@ void norm_1_cpu(vector_base const & vec, * @param vec The vector * @param result The result scalar */ -template -void norm_2_impl(vector_base const & vec, +template +void norm_2_impl(vector_base const & vec, scalar & result) { assert(viennacl::traits::opencl_handle(vec).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -956,8 +956,8 @@ void norm_2_cpu(vector_base const & vec, * @param vec The vector * @param result The result scalar */ -template -void norm_inf_impl(vector_base const & vec, +template +void norm_inf_impl(vector_base const & vec, scalar & result) { assert(viennacl::traits::opencl_handle(vec).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -989,8 +989,8 @@ void norm_inf_impl(vector_base const & vec, * @param vec The vector * @param result The result scalar */ -template -void norm_inf_cpu(vector_base const & vec, +template +void norm_inf_cpu(vector_base const & vec, T & result) { vcl_size_t work_groups = 128; @@ -1021,8 +1021,8 @@ void norm_inf_cpu(vector_base const & vec, * @param vec The vector * @return The result. Note that the result must be a CPU scalar (unsigned int), since gpu scalars are floating point types. */ -template -cl_uint index_norm_inf(vector_base const & vec) +template +cl_uint index_norm_inf(vector_base const & vec) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); viennacl::linalg::opencl::kernels::vector::init(ctx); @@ -1057,8 +1057,8 @@ cl_uint index_norm_inf(vector_base const & vec) * @param x The vector * @param result The result scalar */ -template -void max_impl(vector_base const & x, +template +void max_impl(vector_base const & x, scalar & result) { assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -1095,8 +1095,8 @@ void max_impl(vector_base const & x, * @param x The vector * @param result The result scalar */ -template -void max_cpu(vector_base const & x, +template +void max_cpu(vector_base const & x, NumericT & result) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(x).context()); @@ -1136,8 +1136,8 @@ void max_cpu(vector_base const & x, * @param x The vector * @param result The result scalar */ -template -void min_impl(vector_base const & x, +template +void min_impl(vector_base const & x, scalar & result) { assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -1174,8 +1174,8 @@ void min_impl(vector_base const & x, * @param x The vector * @param result The result scalar */ -template -void min_cpu(vector_base const & x, +template +void min_cpu(vector_base const & x, NumericT & result) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(x).context()); @@ -1213,13 +1213,13 @@ void min_cpu(vector_base const & x, * @param x The vector * @param result The result scalar */ -template -void sum_impl(vector_base const & x, - scalar & result) +template +void sum_impl(vector_base const & x, + scalar & result) { assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); - viennacl::vector all_ones = viennacl::scalar_vector(x.size(), NumericT(1), viennacl::traits::context(x)); + viennacl::vector all_ones = viennacl::scalar_vector(x.size(), NumericT(1), viennacl::traits::context(x)); viennacl::linalg::opencl::inner_prod_impl(x, all_ones, result); } @@ -1247,9 +1247,9 @@ void sum_cpu(vector_base const & x, NumericT & result) * @param alpha The first transformation coefficient * @param beta The second transformation coefficient */ -template -void plane_rotation(vector_base & vec1, - vector_base & vec2, +template +void plane_rotation(vector_base & vec1, + vector_base & vec2, T alpha, T beta) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -1284,15 +1284,15 @@ namespace detail * Note on performance: For non-in-place scans one could optimize away the temporary 'opencl_carries'-array. * This, however, only provides small savings in the latency-dominated regime, yet would effectively double the amount of code to maintain. */ - template> - void scan_impl(vector_base const & input, - vector_base & output, + template + void scan_impl(vector_base const & input, + vector_base & output, bool is_inclusive) { vcl_size_t local_worksize = 128; vcl_size_t workgroups = 128; - viennacl::backend::mem_handle opencl_carries; + viennacl::backend::mem_handle

opencl_carries; viennacl::backend::memory_create(opencl_carries, sizeof(NumericT)*workgroups, viennacl::traits::context(input)); viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(input).context()); @@ -1329,9 +1329,9 @@ namespace detail * @param input Input vector. * @param output The output vector. Either idential to input or non-overlapping. */ -template -void inclusive_scan(vector_base const & input, - vector_base & output) +template +void inclusive_scan(vector_base const & input, + vector_base & output) { detail::scan_impl(input, output, true); } @@ -1342,9 +1342,9 @@ void inclusive_scan(vector_base const & input, * @param input Input vector * @param output The output vector. Either idential to input or non-overlapping. */ -template -void exclusive_scan(vector_base const & input, - vector_base & output) +template +void exclusive_scan(vector_base const & input, + vector_base & output) { detail::scan_impl(input, output, false); } -- GitLab From e660250d2c4d5ce4e1db62530e60f314cf3b5b90 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 27 Dec 2018 18:34:23 -0600 Subject: [PATCH 34/46] more functon support to handle the changes in handle --- viennacl/linalg/vector_operations.hpp | 166 +++++++++++++------------- 1 file changed, 83 insertions(+), 83 deletions(-) diff --git a/viennacl/linalg/vector_operations.hpp b/viennacl/linalg/vector_operations.hpp index 238ecdcc..c8d72ce0 100644 --- a/viennacl/linalg/vector_operations.hpp +++ b/viennacl/linalg/vector_operations.hpp @@ -47,8 +47,8 @@ namespace viennacl { namespace linalg { - template - void convert(vector_base & dest, vector_base const & src) + template + void convert(vector_base & dest, vector_base const & src) { assert(viennacl::traits::size(dest) == viennacl::traits::size(src) && bool("Incompatible vector sizes in v1 = v2 (convert): size(v1) != size(v2)")); @@ -74,9 +74,9 @@ namespace viennacl } } - template - void av(vector_base & vec1, - vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) + template + void av(vector_base & vec1, + vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2) && bool("Incompatible vector sizes in v1 = v2 @ alpha: size(v1) != size(v2)")); @@ -103,10 +103,10 @@ namespace viennacl } - template - void avbv(vector_base & vec1, - vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, - vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) + template + void avbv(vector_base & vec1, + vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, + vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2) && bool("Incompatible vector sizes in v1 = v2 @ alpha + v3 @ beta: size(v1) != size(v2)")); assert(viennacl::traits::size(vec2) == viennacl::traits::size(vec3) && bool("Incompatible vector sizes in v1 = v2 @ alpha + v3 @ beta: size(v2) != size(v3)")); @@ -140,10 +140,10 @@ namespace viennacl } - template - void avbv_v(vector_base & vec1, - vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, - vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) + template + void avbv_v(vector_base & vec1, + vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, + vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2) && bool("Incompatible vector sizes in v1 += v2 @ alpha + v3 @ beta: size(v1) != size(v2)")); assert(viennacl::traits::size(vec2) == viennacl::traits::size(vec3) && bool("Incompatible vector sizes in v1 += v2 @ alpha + v3 @ beta: size(v2) != size(v3)")); @@ -214,8 +214,8 @@ namespace viennacl * @param vec1 The first vector (or -range, or -slice) * @param vec2 The second vector (or -range, or -slice) */ - template - void vector_swap(vector_base & vec1, vector_base & vec2) + template + void vector_swap(vector_base & vec1, vector_base & vec2) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2) && bool("Incompatible vector sizes in vector_swap()")); @@ -251,9 +251,9 @@ namespace viennacl * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ - template - void element_op(vector_base & vec1, - vector_expression, const vector_base, OP> const & proxy) + template + void element_op(vector_base & vec1, + vector_expression, const vector_base, OP> const & proxy) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(proxy) && bool("Incompatible vector sizes in element_op()")); @@ -284,9 +284,9 @@ namespace viennacl * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ - template - void element_op(vector_base & vec1, - vector_expression, const T, OP> const & proxy) + template + void element_op(vector_base & vec1, + vector_expression, const T, OP> const & proxy) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(proxy) && bool("Incompatible vector sizes in element_op()")); @@ -317,9 +317,9 @@ namespace viennacl * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ - template - void element_op(vector_base & vec1, - vector_expression, OP> const & proxy) + template + void element_op(vector_base & vec1, + vector_expression, OP> const & proxy) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(proxy) && bool("Incompatible vector sizes in element_op()")); @@ -490,9 +490,9 @@ namespace viennacl * @param vec2 The second vector * @param result The result scalar (on the gpu) */ - template - void inner_prod_impl(vector_base const & vec1, - vector_base const & vec2, + template + void inner_prod_impl(vector_base const & vec1, + vector_base const & vec2, scalar & result) { assert( vec1.size() == vec2.size() && bool("Size mismatch") ); @@ -520,9 +520,9 @@ namespace viennacl } // vector expression on lhs - template + template void inner_prod_impl(viennacl::vector_expression const & vec1, - vector_base const & vec2, + vector_base const & vec2, scalar & result) { viennacl::vector temp = vec1; @@ -531,8 +531,8 @@ namespace viennacl // vector expression on rhs - template - void inner_prod_impl(vector_base const & vec1, + template + void inner_prod_impl(vector_base const & vec1, viennacl::vector_expression const & vec2, scalar & result) { @@ -562,9 +562,9 @@ namespace viennacl * @param vec2 The second vector * @param result The result scalar (on the gpu) */ - template - void inner_prod_cpu(vector_base const & vec1, - vector_base const & vec2, + template + void inner_prod_cpu(vector_base const & vec1, + vector_base const & vec2, T & result) { assert( vec1.size() == vec2.size() && bool("Size mismatch") ); @@ -592,9 +592,9 @@ namespace viennacl } // vector expression on lhs - template + template void inner_prod_cpu(viennacl::vector_expression const & vec1, - vector_base const & vec2, + vector_base const & vec2, T & result) { viennacl::vector temp = vec1; @@ -603,8 +603,8 @@ namespace viennacl // vector expression on rhs - template - void inner_prod_cpu(vector_base const & vec1, + template + void inner_prod_cpu(vector_base const & vec1, viennacl::vector_expression const & vec2, T & result) { @@ -633,10 +633,10 @@ namespace viennacl * @param y_tuple A collection of vector, all of the same size. * @param result The result scalar (on the gpu). Needs to match the number of elements in y_tuple */ - template - void inner_prod_impl(vector_base const & x, - vector_tuple const & y_tuple, - vector_base & result) + template + void inner_prod_impl(vector_base const & x, + vector_tuple const & y_tuple, + vector_base & result) { assert( x.size() == y_tuple.const_at(0).size() && bool("Size mismatch") ); assert( result.size() == y_tuple.const_size() && bool("Number of elements does not match result size") ); @@ -669,8 +669,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void norm_1_impl(vector_base const & vec, + template + void norm_1_impl(vector_base const & vec, scalar & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) @@ -716,8 +716,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void norm_1_cpu(vector_base const & vec, + template + void norm_1_cpu(vector_base const & vec, T & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) @@ -763,8 +763,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void norm_2_impl(vector_base const & vec, + template + void norm_2_impl(vector_base const & vec, scalar & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) @@ -855,8 +855,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void norm_inf_impl(vector_base const & vec, + template + void norm_inf_impl(vector_base const & vec, scalar & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) @@ -900,8 +900,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void norm_inf_cpu(vector_base const & vec, + template + void norm_inf_cpu(vector_base const & vec, T & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) @@ -948,8 +948,8 @@ namespace viennacl * @param vec The vector * @return The result. Note that the result must be a CPU scalar */ - template - vcl_size_t index_norm_inf(vector_base const & vec) + template + vcl_size_t index_norm_inf(vector_base const & vec) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -988,8 +988,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void max_impl(vector_base const & vec, viennacl::scalar & result) + template + void max_impl(vector_base const & vec, viennacl::scalar & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -1031,8 +1031,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void max_cpu(vector_base const & vec, T & result) + template + void max_cpu(vector_base const & vec, T & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -1075,8 +1075,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void min_impl(vector_base const & vec, viennacl::scalar & result) + template + void min_impl(vector_base const & vec, viennacl::scalar & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -1118,8 +1118,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void min_cpu(vector_base const & vec, T & result) + template + void min_cpu(vector_base const & vec, T & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -1162,8 +1162,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void sum_impl(vector_base const & vec, viennacl::scalar & result) + template + void sum_impl(vector_base const & vec, viennacl::scalar & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -1205,8 +1205,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void sum_cpu(vector_base const & vec, T & result) + template + void sum_cpu(vector_base const & vec, T & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -1255,9 +1255,9 @@ namespace viennacl * @param alpha The first transformation coefficient (CPU scalar) * @param beta The second transformation coefficient (CPU scalar) */ - template - void plane_rotation(vector_base & vec1, - vector_base & vec2, + template + void plane_rotation(vector_base & vec1, + vector_base & vec2, T alpha, T beta) { switch (viennacl::traits::handle(vec1).get_active_handle_id()) @@ -1293,9 +1293,9 @@ namespace viennacl * @param vec1 Input vector. * @param vec2 The output vector. */ - template - void inclusive_scan(vector_base & vec1, - vector_base & vec2) + template + void inclusive_scan(vector_base & vec1, + vector_base & vec2) { switch (viennacl::traits::handle(vec1).get_active_handle_id()) { @@ -1326,8 +1326,8 @@ namespace viennacl * Given an input element vector (x_0, x_1, ..., x_{n-1}), * this routine overwrites the vector with (x_0, x_0 + x_1, ..., x_0 + x_1 + ... + x_{n-1}) */ - template - void inclusive_scan(vector_base & vec) + template + void inclusive_scan(vector_base & vec) { inclusive_scan(vec, vec); } @@ -1343,9 +1343,9 @@ namespace viennacl * @param vec1 Input vector. * @param vec2 The output vector. */ - template - void exclusive_scan(vector_base & vec1, - vector_base & vec2) + template + void exclusive_scan(vector_base & vec1, + vector_base & vec2) { switch (viennacl::traits::handle(vec1).get_active_handle_id()) { @@ -1376,15 +1376,15 @@ namespace viennacl * Given an element vector (x_0, x_1, ..., x_{n-1}), * this routine overwrites the input vector with (0, x_0, x_0 + x_1, ..., x_0 + x_1 + ... + x_{n-2}) */ - template - void exclusive_scan(vector_base & vec) + template + void exclusive_scan(vector_base & vec) { exclusive_scan(vec, vec); } } //namespace linalg - template - vector_base & operator += (vector_base & v1, const vector_expression & proxy) + template + vector_base & operator += (vector_base & v1, const vector_expression & proxy) { assert( (viennacl::traits::size(proxy) == v1.size()) && bool("Incompatible vector sizes!")); assert( (v1.size() > 0) && bool("Vector not yet initialized!") ); @@ -1394,8 +1394,8 @@ namespace viennacl return v1; } - template - vector_base & operator -= (vector_base & v1, const vector_expression & proxy) + template + vector_base & operator -= (vector_base & v1, const vector_expression & proxy) { assert( (viennacl::traits::size(proxy) == v1.size()) && bool("Incompatible vector sizes!")); assert( (v1.size() > 0) && bool("Vector not yet initialized!") ); -- GitLab From d0c0176098661534a1867ade6e0cf39ec0b500c8 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 27 Dec 2018 18:45:30 -0600 Subject: [PATCH 35/46] adds function support for the host based functions(necessary for compilation) --- .../linalg/host_based/vector_operations.hpp | 116 +++++++++--------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/viennacl/linalg/host_based/vector_operations.hpp b/viennacl/linalg/host_based/vector_operations.hpp index c7fe386e..6c41ba6d 100644 --- a/viennacl/linalg/host_based/vector_operations.hpp +++ b/viennacl/linalg/host_based/vector_operations.hpp @@ -64,8 +64,8 @@ namespace detail // // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here! // -template -void convert(vector_base & dest, vector_base const & src) +template +void convert(vector_base & dest, vector_base const & src) { DestNumericT * data_dest = detail::extract_raw_pointer(dest); SrcNumericT const * data_src = detail::extract_raw_pointer(src); @@ -84,9 +84,9 @@ void convert(vector_base & dest, vector_base const & data_dest[static_cast(i)*inc_dest+start_dest] = static_cast(data_src[static_cast(i)*inc_src+start_src]); } -template -void av(vector_base & vec1, - vector_base const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha) +template +void av(vector_base & vec1, + vector_base const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha) { typedef NumericT value_type; @@ -123,10 +123,10 @@ void av(vector_base & vec1, } -template -void avbv(vector_base & vec1, - vector_base const & vec2, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha, - vector_base const & vec3, ScalarT2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta) +template +void avbv(vector_base & vec1, + vector_base const & vec2, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha, + vector_base const & vec3, ScalarT2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta) { typedef NumericT value_type; @@ -193,10 +193,10 @@ void avbv(vector_base & vec1, } -template -void avbv_v(vector_base & vec1, - vector_base const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha, - vector_base const & vec3, ScalarT2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta) +template +void avbv_v(vector_base & vec1, + vector_base const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha, + vector_base const & vec3, ScalarT2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta) { typedef NumericT value_type; @@ -298,8 +298,8 @@ void vector_assign(vector_base & vec1, const NumericT & alpha, bool * @param vec1 The first vector (or -range, or -slice) * @param vec2 The second vector (or -range, or -slice) */ -template -void vector_swap(vector_base & vec1, vector_base & vec2) +template +void vector_swap(vector_base & vec1, vector_base & vec2) { typedef NumericT value_type; @@ -332,9 +332,9 @@ void vector_swap(vector_base & vec1, vector_base & vec2) * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, const vector_base, op_element_binary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, const vector_base, op_element_binary > const & proxy) { typedef NumericT value_type; typedef viennacl::linalg::detail::op_applier > OpFunctor; @@ -365,9 +365,9 @@ void element_op(vector_base & vec1, * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding alpha, v3 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, const NumericT, op_element_binary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, const NumericT, op_element_binary > const & proxy) { typedef NumericT value_type; typedef viennacl::linalg::detail::op_applier > OpFunctor; @@ -394,9 +394,9 @@ void element_op(vector_base & vec1, * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, alpha and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, op_element_binary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, op_element_binary > const & proxy) { typedef NumericT value_type; typedef viennacl::linalg::detail::op_applier > OpFunctor; @@ -425,9 +425,9 @@ void element_op(vector_base & vec1, * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, const vector_base, op_element_unary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, const vector_base, op_element_unary > const & proxy) { typedef NumericT value_type; typedef viennacl::linalg::detail::op_applier > OpFunctor; @@ -553,9 +553,9 @@ VIENNACL_INNER_PROD_IMPL_2(double) * @param vec2 The second vector * @param result The result scalar (on the gpu) */ -template -void inner_prod_impl(vector_base const & vec1, - vector_base const & vec2, +template +void inner_prod_impl(vector_base const & vec1, + vector_base const & vec2, ScalarT & result) { typedef NumericT value_type; @@ -574,10 +574,10 @@ void inner_prod_impl(vector_base const & vec1, data_vec2, start2, inc2); //Note: Assignment to result might be expensive, thus a temporary is introduced here } -template -void inner_prod_impl(vector_base const & x, - vector_tuple const & vec_tuple, - vector_base & result) +template +void inner_prod_impl(vector_base const & x, + vector_tuple const & vec_tuple, + vector_base & result) { typedef NumericT value_type; @@ -704,8 +704,8 @@ VIENNACL_NORM_1_IMPL_2(double, double) * @param vec1 The vector * @param result The result scalar */ -template -void norm_1_impl(vector_base const & vec1, +template +void norm_1_impl(vector_base const & vec1, ScalarT & result) { typedef NumericT value_type; @@ -837,8 +837,8 @@ void norm_2_impl(vector_base const & vec1, * @param vec1 The vector * @param result The result scalar */ -template -void norm_inf_impl(vector_base const & vec1, +template +void norm_inf_impl(vector_base const & vec1, ScalarT & result) { typedef NumericT value_type; @@ -887,8 +887,8 @@ void norm_inf_impl(vector_base const & vec1, * @param vec1 The vector * @return The result. Note that the result must be a CPU scalar (unsigned int), since gpu scalars are floating point types. */ -template -vcl_size_t index_norm_inf(vector_base const & vec1) +template +vcl_size_t index_norm_inf(vector_base const & vec1) { typedef NumericT value_type; @@ -947,8 +947,8 @@ vcl_size_t index_norm_inf(vector_base const & vec1) * @param vec1 The vector * @param result The result scalar */ -template -void max_impl(vector_base const & vec1, +template +void max_impl(vector_base const & vec1, ScalarT & result) { typedef NumericT value_type; @@ -996,8 +996,8 @@ void max_impl(vector_base const & vec1, * @param vec1 The vector * @param result The result scalar */ -template -void min_impl(vector_base const & vec1, +template +void min_impl(vector_base const & vec1, ScalarT & result) { typedef NumericT value_type; @@ -1045,8 +1045,8 @@ void min_impl(vector_base const & vec1, * @param vec1 The vector * @param result The result scalar */ -template -void sum_impl(vector_base const & vec1, +template +void sum_impl(vector_base const & vec1, ScalarT & result) { typedef NumericT value_type; @@ -1076,9 +1076,9 @@ void sum_impl(vector_base const & vec1, * @param alpha The first transformation coefficient * @param beta The second transformation coefficient */ -template -void plane_rotation(vector_base & vec1, - vector_base & vec2, +template +void plane_rotation(vector_base & vec1, + vector_base & vec2, NumericT alpha, NumericT beta) { typedef NumericT value_type; @@ -1112,9 +1112,9 @@ void plane_rotation(vector_base & vec1, namespace detail { /** @brief Implementation of inclusive_scan and exclusive_scan for the host (OpenMP) backend. */ - template - void vector_scan_impl(vector_base const & vec1, - vector_base & vec2, + template + void vector_scan_impl(vector_base const & vec1, + vector_base & vec2, bool is_inclusive) { NumericT const * data_vec1 = detail::extract_raw_pointer(vec1); @@ -1217,9 +1217,9 @@ namespace detail * @param vec1 Input vector: Gets overwritten by the routine. * @param vec2 The output vector. Either idential to vec1 or non-overlapping. */ -template -void inclusive_scan(vector_base const & vec1, - vector_base & vec2) +template +void inclusive_scan(vector_base const & vec1, + vector_base & vec2) { detail::vector_scan_impl(vec1, vec2, true); } @@ -1232,9 +1232,9 @@ void inclusive_scan(vector_base const & vec1, * @param vec1 Input vector: Gets overwritten by the routine. * @param vec2 The output vector. Either idential to vec1 or non-overlapping. */ -template -void exclusive_scan(vector_base const & vec1, - vector_base & vec2) +template +void exclusive_scan(vector_base const & vec1, + vector_base & vec2) { detail::vector_scan_impl(vec1, vec2, false); } -- GitLab From 690a079270ba7e190b912c805631bd402ee900d2 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 27 Dec 2018 18:53:15 -0600 Subject: [PATCH 36/46] removes debugging distruptions. --- viennacl/backend/mem_handle.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/viennacl/backend/mem_handle.hpp b/viennacl/backend/mem_handle.hpp index 29a09d8c..b5335123 100644 --- a/viennacl/backend/mem_handle.hpp +++ b/viennacl/backend/mem_handle.hpp @@ -24,7 +24,6 @@ #include #include -#include #include "viennacl/forwards.h" #include "viennacl/tools/shared_ptr.hpp" #include "viennacl/backend/cpu_ram.hpp" @@ -234,9 +233,6 @@ public: /** @brief Sets the size of the currently active buffer. Use with care! */ void raw_size(vcl_size_t new_size) { size_in_bytes_ = new_size; } - ~mem_handle() - {} - private: memory_types active_handle_; ram_handle_type ram_handle_; -- GitLab From 4d32d3dcfc9c88d36bf4b3ed5e9acd1327d588cd Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Thu, 27 Dec 2018 19:08:13 -0600 Subject: [PATCH 37/46] formatting to decrease the diff --- viennacl/backend/mem_handle.hpp | 8 ++++---- viennacl/ocl/context.hpp | 4 +--- viennacl/ocl/forwards.h | 7 +++---- viennacl/ocl/handle.hpp | 12 ++++-------- viennacl/traits/handle.hpp | 2 +- 5 files changed, 13 insertions(+), 20 deletions(-) diff --git a/viennacl/backend/mem_handle.hpp b/viennacl/backend/mem_handle.hpp index b5335123..ed8aa8ab 100644 --- a/viennacl/backend/mem_handle.hpp +++ b/viennacl/backend/mem_handle.hpp @@ -86,7 +86,7 @@ inline memory_types default_memory_type(memory_types new_memory_type) { return d * Instead, this class collects all the necessary conditional compilations. * */ -template +template class mem_handle { public: @@ -103,9 +103,9 @@ public: #ifdef VIENNACL_WITH_OPENCL /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */ - OCL_Handle & opencl_handle() { return opencl_handle_; } + OCLHandle & opencl_handle() { return opencl_handle_; } /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */ - OCL_Handle const & opencl_handle() const { return opencl_handle_; } + OCLHandle const & opencl_handle() const { return opencl_handle_; } #endif #ifdef VIENNACL_WITH_CUDA @@ -237,7 +237,7 @@ private: memory_types active_handle_; ram_handle_type ram_handle_; #ifdef VIENNACL_WITH_OPENCL - OCL_Handle opencl_handle_; + OCLHandle opencl_handle_; #endif #ifdef VIENNACL_WITH_CUDA cuda_handle_type cuda_handle_; diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index a2a6baae..bc8a2e9b 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -8,7 +8,6 @@ Portions of this software are copyright by UChicago Argonne, LLC. ----------------- - ViennaCL - The Vienna Computing Library ----------------- @@ -43,8 +42,8 @@ #include #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/error.hpp" -#include "viennacl/ocl/kernel.hpp" #include "viennacl/ocl/handle.hpp" +#include "viennacl/ocl/kernel.hpp" #include "viennacl/ocl/program.hpp" #include "viennacl/ocl/device.hpp" #include "viennacl/ocl/platform.hpp" @@ -366,7 +365,6 @@ public: viennacl::ocl::handle queue_handle(q, *this); queues_[dev].push_back(viennacl::ocl::command_queue(queue_handle)); queues_[dev].back().handle().inc(); - } /** @brief Adds a queue for the given device to the context */ diff --git a/viennacl/ocl/forwards.h b/viennacl/ocl/forwards.h index 091feeb2..5c7d0699 100644 --- a/viennacl/ocl/forwards.h +++ b/viennacl/ocl/forwards.h @@ -25,7 +25,6 @@ #define VIENNACL_OCL_MAX_DEVICE_NUM 8 #include -#include namespace viennacl { @@ -41,15 +40,15 @@ namespace viennacl /** @brief A tag denoting the default OpenCL device type (SDK-specific) */ struct default_tag {}; - template - class handle; - class kernel; class device; class command_queue; class context; class program; + template + class handle; + template void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue); diff --git a/viennacl/ocl/handle.hpp b/viennacl/ocl/handle.hpp index 11aa3633..9fe0db0d 100644 --- a/viennacl/ocl/handle.hpp +++ b/viennacl/ocl/handle.hpp @@ -46,7 +46,6 @@ namespace viennacl class handle_inc_dec_helper { typedef typename OCL_TYPE::ERROR_TEMPLATE_ARGUMENT_FOR_CLASS_INVALID ErrorType; - }; /** \cond */ @@ -154,10 +153,8 @@ namespace viennacl { public: handle() : h_(0), p_context_(NULL) {} - handle(const OCL_TYPE & something, viennacl::ocl::context const & c) : h_(something), p_context_(&c) - {} - handle(const handle & other) : h_(other.h_), p_context_(other.p_context_) { - if (h_ != 0) inc(); } + handle(const OCL_TYPE & something, viennacl::ocl::context const & c) : h_(something), p_context_(&c) {} + handle(const handle & other) : h_(other.h_), p_context_(other.p_context_) { if (h_ != 0) inc(); } virtual ~handle() { if (h_ != 0) dec(); } /** @brief Copies the OpenCL handle from the provided handle. Does not take ownership like e.g. std::auto_ptr<>, so both handle objects are valid (more like shared_ptr). */ @@ -188,6 +185,7 @@ namespace viennacl return *this; } + /** @brief Implicit conversion to the plain OpenCL handle. DEPRECATED and will be removed some time in the future. */ operator OCL_TYPE() const { return h_; } @@ -217,9 +215,7 @@ namespace viennacl /** @brief Manually increment the OpenCL reference count. Typically called automatically, but is necessary if user-supplied memory objects are wrapped. */ virtual void inc() { handle_inc_dec_helper::inc(h_); } /** @brief Manually decrement the OpenCL reference count. Typically called automatically, but might be useful with user-supplied memory objects. */ - virtual void dec() { - handle_inc_dec_helper::dec(h_); - } + virtual void dec() { handle_inc_dec_helper::dec(h_); } protected: OCL_TYPE h_; viennacl::ocl::context const * p_context_; diff --git a/viennacl/traits/handle.hpp b/viennacl/traits/handle.hpp index 67ed71fc..e1d69f3d 100644 --- a/viennacl/traits/handle.hpp +++ b/viennacl/traits/handle.hpp @@ -183,6 +183,7 @@ inline viennacl::tools::shared_ptr const & ram_handle(viennacl::backend::m { return h.ram_handle(); } + /** \endcond */ // // OpenCL handle extraction @@ -214,7 +215,6 @@ viennacl::ocl::handle const & opencl_handle(T const & obj) return viennacl::traits::handle(obj).opencl_handle(); } - inline cl_char opencl_handle(char val) { return val; } //for unification purposes when passing CPU-scalars to kernels inline cl_short opencl_handle(short val) { return val; } //for unification purposes when passing CPU-scalars to kernels inline cl_int opencl_handle(int val) { return val; } //for unification purposes when passing CPU-scalars to kernels -- GitLab From 8c4f8ae5687fe3876637e47cd7ca6beebbc6c604 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 28 Dec 2018 08:29:43 -0600 Subject: [PATCH 38/46] WIP: adds handle parameter to matrix_base, matrix class --- viennacl/detail/matrix_def.hpp | 14 +- viennacl/forwards.h | 4 +- viennacl/matrix.hpp | 642 ++++++++++++++++----------------- 3 files changed, 330 insertions(+), 330 deletions(-) diff --git a/viennacl/detail/matrix_def.hpp b/viennacl/detail/matrix_def.hpp index c13ef01f..2b594670 100644 --- a/viennacl/detail/matrix_def.hpp +++ b/viennacl/detail/matrix_def.hpp @@ -99,10 +99,10 @@ public: scalar_matrix(size_type s1, size_type s2, const_reference val, viennacl::context ctx = viennacl::context()) : implicit_matrix_base(s1, s2, val, false, ctx) {} }; -template +template class matrix_base { - typedef matrix_base self_type; + typedef matrix_base self_type; public: typedef matrix_iterator iterator1; @@ -111,7 +111,7 @@ public: typedef NumericT cpu_value_type; typedef SizeT size_type; typedef DistanceT difference_type; - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle handle_type; /** @brief The default constructor. Does not allocate any memory. */ explicit matrix_base(): size1_(0), size2_(0), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(0), internal_size2_(0), row_major_fixed_(false), row_major_(true) {} @@ -129,7 +129,7 @@ public: explicit matrix_base(size_type rows, size_type columns, bool is_row_major, viennacl::context ctx = viennacl::context()); /** @brief Constructor for creating a matrix_range or matrix_stride from some other matrix/matrix_range/matrix_stride */ - explicit matrix_base(viennacl::backend::mem_handle & h, + explicit matrix_base(handle_type & h, size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1, size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2, bool is_row_major): size1_(mat_size1), size2_(mat_size2), @@ -161,8 +161,8 @@ public: matrix_base(const self_type & other); /* Conversion CTOR */ - template - matrix_base(const matrix_base & other); + template + matrix_base(const matrix_base & other); self_type & operator=(const self_type & other); template @@ -249,7 +249,7 @@ public: void switch_memory_context(viennacl::context new_ctx) { viennacl::backend::switch_memory_context(elements_, new_ctx); } protected: - void set_handle(viennacl::backend::mem_handle const & h); + void set_handle(handle_type const & h); void resize(size_type rows, size_type columns, bool preserve = true); private: size_type size1_; diff --git a/viennacl/forwards.h b/viennacl/forwards.h index 90d53816..5877da4c 100644 --- a/viennacl/forwards.h +++ b/viennacl/forwards.h @@ -401,10 +401,10 @@ namespace viennacl template class matrix_iterator; - template + template, typename SizeType = vcl_size_t, typename DistanceType = vcl_ptrdiff_t> class matrix_base; - template + template, typename F = row_major, unsigned int ALIGNMENT = 1> class matrix; template diff --git a/viennacl/matrix.hpp b/viennacl/matrix.hpp index 14c980c3..c83b6ffd 100644 --- a/viennacl/matrix.hpp +++ b/viennacl/matrix.hpp @@ -123,8 +123,8 @@ private: * @param columns Number of columns * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) */ -template -matrix_base::matrix_base(size_type rows, size_type columns, bool is_row_major, viennacl::context ctx) +template +matrix_base::matrix_base(size_type rows, size_type columns, bool is_row_major, viennacl::context ctx) : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(viennacl::tools::align_to_multiple(rows, dense_padding_size)), internal_size2_(viennacl::tools::align_to_multiple(columns, dense_padding_size)), @@ -139,9 +139,9 @@ matrix_base::matrix_base(size_type rows, size_type c /** @brief Constructor for creating a matrix_range or matrix_stride from some other matrix/matrix_range/matrix_stride */ -template +template template -matrix_base::matrix_base(matrix_expression const & proxy) : +matrix_base::matrix_base(matrix_expression const & proxy) : size1_(viennacl::traits::size1(proxy)), size2_(viennacl::traits::size2(proxy)), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(viennacl::tools::align_to_multiple(size1_, dense_padding_size)), internal_size2_(viennacl::tools::align_to_multiple(size2_, dense_padding_size)), @@ -157,8 +157,8 @@ matrix_base::matrix_base(matrix_expression -matrix_base::matrix_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type, +template +matrix_base::matrix_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1, size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2, bool is_row_major) @@ -189,8 +189,8 @@ matrix_base::matrix_base(NumericT * ptr_to_mem, vien } #ifdef VIENNACL_WITH_OPENCL -template -matrix_base::matrix_base(cl_mem mem, size_type rows, size_type columns, bool is_row_major, viennacl::context ctx) +template +matrix_base::matrix_base(cl_mem mem, size_type rows, size_type columns, bool is_row_major, viennacl::context ctx) : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1), @@ -204,8 +204,8 @@ matrix_base::matrix_base(cl_mem mem, size_type rows, elements_.raw_size(sizeof(NumericT)*internal_size()); } -template -matrix_base::matrix_base(cl_mem mem, viennacl::context ctx, +template +matrix_base::matrix_base(cl_mem mem, viennacl::context ctx, size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1, size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2, bool is_row_major) @@ -224,8 +224,8 @@ matrix_base::matrix_base(cl_mem mem, viennacl::conte #endif // Copy CTOR -template -matrix_base::matrix_base(const matrix_base & other) : +template +matrix_base::matrix_base(const matrix_base & other) : size1_(other.size1()), size2_(other.size2()), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(viennacl::tools::align_to_multiple(size1_, dense_padding_size)), internal_size2_(viennacl::tools::align_to_multiple(size2_, dense_padding_size)), @@ -243,7 +243,7 @@ matrix_base::matrix_base(const matrix_base template -matrix_base::matrix_base(const matrix_base & other) : +matrix_base::matrix_base(const matrix_base & other) : size1_(other.size1()), size2_(other.size2()), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(viennacl::tools::align_to_multiple(size1_, dense_padding_size)), internal_size2_(viennacl::tools::align_to_multiple(size2_, dense_padding_size)), @@ -258,8 +258,8 @@ matrix_base::matrix_base(const matrix_base -matrix_base & matrix_base::operator=(const self_type & other) //enables implicit conversions +template +matrix_base & matrix_base::operator=(const self_type & other) //enables implicit conversions { if (&other==this) return *this; @@ -279,9 +279,9 @@ matrix_base & matrix_base +template template -matrix_base & matrix_base::operator=(const matrix_base & other) +matrix_base & matrix_base::operator=(const matrix_base & other) { if (internal_size() == 0) { @@ -300,9 +300,9 @@ matrix_base & matrix_base +template template -matrix_base & matrix_base::operator=(const matrix_expression & proxy) +matrix_base & matrix_base::operator=(const matrix_expression & proxy) { assert( (viennacl::traits::size1(proxy) == size1() || size1() == 0) && (viennacl::traits::size2(proxy) == size2() || size2() == 0) @@ -328,8 +328,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator=(const matrix_expression & proxy) +template +matrix_base & matrix_base::operator=(const matrix_expression & proxy) { if ( internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0 ) { @@ -358,9 +358,9 @@ matrix_base & matrix_base +template template -matrix_base & matrix_base::operator+=(const matrix_expression & proxy) +matrix_base & matrix_base::operator+=(const matrix_expression & proxy) { assert( (viennacl::traits::size1(proxy) == size1()) && (viennacl::traits::size2(proxy) == size2()) @@ -373,9 +373,9 @@ matrix_base & matrix_base +template template -matrix_base & matrix_base::operator-=(const matrix_expression & proxy) +matrix_base & matrix_base::operator-=(const matrix_expression & proxy) { assert( (viennacl::traits::size1(proxy) == size1()) && (viennacl::traits::size2(proxy) == size2()) @@ -389,8 +389,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator = (identity_matrix const & m) +template +matrix_base & matrix_base::operator = (identity_matrix const & m) { assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") ); assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") ); @@ -417,8 +417,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator = (zero_matrix const & m) +template +matrix_base & matrix_base::operator = (zero_matrix const & m) { assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") ); assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") ); @@ -442,8 +442,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator = (scalar_matrix const & m) +template +matrix_base & matrix_base::operator = (scalar_matrix const & m) { assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") ); assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") ); @@ -473,8 +473,8 @@ matrix_base & matrix_base -entry_proxy matrix_base::operator()(size_type row_index, size_type col_index) +template +entry_proxy matrix_base::operator()(size_type row_index, size_type col_index) { if (row_major_) return entry_proxy(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_); @@ -483,8 +483,8 @@ entry_proxy matrix_base::operator()(size_t /** @brief Read access to a single element of the matrix/matrix_range/matrix_slice */ -template -const_entry_proxy matrix_base::operator()(size_type row_index, size_type col_index) const +template +const_entry_proxy matrix_base::operator()(size_type row_index, size_type col_index) const { if (row_major_) return const_entry_proxy(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_); @@ -494,8 +494,8 @@ const_entry_proxy matrix_base::operator()( // // Operator overloads for enabling implicit conversions: // -template -matrix_base & matrix_base::operator += (const matrix_base & other) +template +matrix_base & matrix_base::operator += (const matrix_base & other) { viennacl::linalg::ambm(*this, *this, NumericT(1.0), 1, false, false, @@ -503,8 +503,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator -= (const matrix_base & other) +template +matrix_base & matrix_base::operator -= (const matrix_base & other) { viennacl::linalg::ambm(*this, *this, NumericT(1.0), 1, false, false, @@ -513,8 +513,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator *= (char val) +template +matrix_base & matrix_base::operator *= (char val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, false, false); @@ -522,8 +522,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator *= (short val) +template +matrix_base & matrix_base::operator *= (short val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, false, false); @@ -531,8 +531,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator *= (int val) +template +matrix_base & matrix_base::operator *= (int val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, false, false); @@ -540,8 +540,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator *= (long val) +template +matrix_base & matrix_base::operator *= (long val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, false, false); @@ -549,8 +549,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator *= (float val) +template +matrix_base & matrix_base::operator *= (float val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, false, false); @@ -558,8 +558,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator *= (double val) +template +matrix_base & matrix_base::operator *= (double val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, false, false); @@ -569,8 +569,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator /= (char val) +template +matrix_base & matrix_base::operator /= (char val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, true, false); @@ -578,8 +578,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator /= (short val) +template +matrix_base & matrix_base::operator /= (short val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, true, false); @@ -587,8 +587,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator /= (int val) +template +matrix_base & matrix_base::operator /= (int val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, true, false); @@ -596,8 +596,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator /= (long val) +template +matrix_base & matrix_base::operator /= (long val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, true, false); @@ -605,8 +605,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator /= (float val) +template +matrix_base & matrix_base::operator /= (float val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, true, false); @@ -614,8 +614,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator /= (double val) +template +matrix_base & matrix_base::operator /= (double val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, true, false); @@ -624,18 +624,18 @@ matrix_base & matrix_base -matrix_expression, const NumericT, op_mult> matrix_base::operator-() const +template +matrix_expression, const NumericT, op_mult> matrix_base::operator-() const { return matrix_expression(*this, NumericT(-1)); } -template -void matrix_base::clear() { viennacl::linalg::matrix_assign(*this, NumericT(0), true); } +template +void matrix_base::clear() { viennacl::linalg::matrix_assign(*this, NumericT(0), true); } -template -void matrix_base::resize(size_type rows, size_type columns, bool preserve) +template +void matrix_base::resize(size_type rows, size_type columns, bool preserve) { assert( (rows > 0 && columns > 0) && bool("Check failed in matrix::resize(): Number of rows and columns must be positive!")); @@ -692,11 +692,11 @@ void matrix_base::resize(size_type rows, size_type c * @tparam F Storage layout: Either row_major or column_major * @tparam AlignmentV The internal memory size is given by (size()/AlignmentV + 1) * AlignmentV. AlignmentV must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. */ -template -class matrix : public matrix_base +template +class matrix : public matrix_base { - typedef matrix self_type; - typedef matrix_base base_type; + typedef matrix self_type; + typedef matrix_base base_type; public: typedef typename base_type::size_type size_type; @@ -824,10 +824,10 @@ public: * @param s STL output stream * @param gpu_matrix A dense ViennaCL matrix */ -template -std::ostream & operator<<(std::ostream & s, const matrix_base & gpu_matrix) +template +std::ostream & operator<<(std::ostream & s, const matrix_base & gpu_matrix) { - typedef typename matrix_base::size_type size_type; + typedef typename matrix_base::size_type size_type; std::vector tmp(gpu_matrix.internal_size()); viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * gpu_matrix.internal_size(), &(tmp[0])); @@ -872,11 +872,11 @@ std::ostream & operator<<(std::ostream & s, const matrix_expression -matrix_expression< const matrix_base, const matrix_base, op_trans> +template +matrix_expression< const matrix_base, const matrix_base, op_trans> trans(const matrix_base & mat) { - return matrix_expression< const matrix_base, const matrix_base, op_trans>(mat, mat); + return matrix_expression< const matrix_base, const matrix_base, op_trans>(mat, mat); } /** @brief Returns an expression template class representing the transposed matrix expression */ @@ -890,34 +890,34 @@ trans(const matrix_expression & proxy) } //diag(): -template -vector_expression< const matrix_base, const int, op_matrix_diag> +template +vector_expression< const matrix_base, const int, op_matrix_diag> diag(const matrix_base & A, int k = 0) { - return vector_expression< const matrix_base, const int, op_matrix_diag>(A, k); + return vector_expression< const matrix_base, const int, op_matrix_diag>(A, k); } -template -matrix_expression< const vector_base, const int, op_vector_diag> +template +matrix_expression< const vector_base, const int, op_vector_diag> diag(const vector_base & v, int k = 0) { - return matrix_expression< const vector_base, const int, op_vector_diag>(v, k); + return matrix_expression< const vector_base, const int, op_vector_diag>(v, k); } // row(): -template -vector_expression< const matrix_base, const unsigned int, op_row> -row(const matrix_base & A, unsigned int i) +template +vector_expression< const matrix_base, const unsigned int, op_row> +row(const matrix_base & A, unsigned int i) { - return vector_expression< const matrix_base, const unsigned int, op_row>(A, i); + return vector_expression< const matrix_base, const unsigned int, op_row>(A, i); } // column(): -template -vector_expression< const matrix_base, const unsigned int, op_column> -column(const matrix_base & A, unsigned int j) +template +vector_expression< const matrix_base, const unsigned int, op_column> +column(const matrix_base & A, unsigned int j) { - return vector_expression< const matrix_base, const unsigned int, op_column>(A, j); + return vector_expression< const matrix_base, const unsigned int, op_column>(A, j); } /////////////////////// transfer operations: ////////////////////////////////////// @@ -930,11 +930,11 @@ column(const matrix_base & A, unsigned int j) * @param cpu_matrix A dense matrix on the host. Type requirements: .size1() returns number of rows, .size2() returns number of columns. Access to entries via operator() * @param gpu_matrix A dense ViennaCL matrix */ -template +template void copy(const CPUMatrixT & cpu_matrix, - matrix & gpu_matrix ) + matrix & gpu_matrix ) { - typedef typename matrix::size_type size_type; + typedef typename matrix::size_type size_type; //std::cout << "Copying CPUMatrixT!" << std::endl; //std::cout << "Size at begin: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl; @@ -966,11 +966,11 @@ void copy(const CPUMatrixT & cpu_matrix, * @param cpu_matrix A dense matrix on the host of type std::vector< std::vector<> >. cpu_matrix[i][j] returns the element in the i-th row and j-th columns (both starting with zero) * @param gpu_matrix A dense ViennaCL matrix */ -template +template void copy(const std::vector< std::vector, A2> & cpu_matrix, - matrix & gpu_matrix ) + matrix & gpu_matrix ) { - typedef typename matrix::size_type size_type; + typedef typename matrix::size_type size_type; if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0) { @@ -1006,10 +1006,10 @@ void copy(const std::vector< std::vector, A2> & cpu_matrix, * @param cpu_matrix_end Pointer past the last matrix entry. Cf. iterator concept in STL * @param gpu_matrix A dense ViennaCL matrix */ -template +template void fast_copy(NumericT * cpu_matrix_begin, NumericT * cpu_matrix_end, - matrix & gpu_matrix) + matrix & gpu_matrix) { if (gpu_matrix.internal_size() == 0) viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(NumericT) * static_cast(cpu_matrix_end - cpu_matrix_begin), viennacl::traits::context(gpu_matrix), cpu_matrix_begin); @@ -1026,11 +1026,11 @@ void fast_copy(NumericT * cpu_matrix_begin, * @param arma_matrix A dense MTL matrix. cpu_matrix(i, j) returns the element in the i-th row and j-th columns (both starting with zero) * @param gpu_matrix A dense ViennaCL matrix */ -template +template void copy(arma::Mat const & arma_matrix, - viennacl::matrix & vcl_matrix) + viennacl::matrix & vcl_matrix) { - typedef typename viennacl::matrix::size_type size_type; + typedef typename viennacl::matrix::size_type size_type; if (vcl_matrix.size1() == 0 || vcl_matrix.size2() == 0) { @@ -1195,8 +1195,8 @@ void copy(const matrix & gpu_matrix, * @param gpu_matrix A dense ViennaCL matrix * @param cpu_matrix A dense memory on the host using STL types, typically std::vector< std::vector<> > Must have at least as many rows and columns as the gpu_matrix! Type requirement: Access to entries via operator() */ -template -void copy(const matrix & gpu_matrix, +template +void copy(const matrix & gpu_matrix, std::vector< std::vector, A2> & cpu_matrix) { typedef typename matrix::size_type size_type; @@ -1227,8 +1227,8 @@ void copy(const matrix & gpu_matrix, * @param gpu_matrix A dense ViennaCL matrix * @param cpu_matrix_begin Pointer to the output memory on the CPU. User must ensure that provided memory is large enough. */ -template -void fast_copy(const matrix & gpu_matrix, +template +void fast_copy(const matrix & gpu_matrix, NumericT * cpu_matrix_begin) { viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), cpu_matrix_begin); @@ -1258,12 +1258,12 @@ operator + (matrix_expression const & proxy1, } template + typename NumericT, tyepname H> matrix_expression< const matrix_expression, -const matrix_base, +const matrix_base, op_add> operator + (matrix_expression const & proxy1, - matrix_base const & proxy2) + matrix_base const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) @@ -1273,12 +1273,12 @@ operator + (matrix_expression const & proxy1, op_add>(proxy1, proxy2); } -template -matrix_expression< const matrix_base, +matrix_expression< const matrix_base, const matrix_expression, op_add> -operator + (matrix_base const & proxy1, +operator + (matrix_base const & proxy1, matrix_expression const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) @@ -1318,44 +1318,44 @@ operator - (matrix_expression const & proxy1, } template + typename NumericT, typename H> matrix_expression< const matrix_expression, -const matrix_base, +const matrix_base, op_sub> operator - (matrix_expression const & proxy1, - matrix_base const & proxy2) + matrix_base const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) && bool("Incompatible matrix sizes!")); return matrix_expression< const matrix_expression, - const matrix_base, + const matrix_base, op_sub>(proxy1, proxy2); } -template -matrix_expression< const matrix_base, +matrix_expression< const matrix_base, const matrix_expression, op_sub> -operator - (matrix_base const & proxy1, +operator - (matrix_base const & proxy1, matrix_expression const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) && bool("Incompatible matrix sizes!")); - return matrix_expression< const matrix_base, + return matrix_expression< const matrix_base, const matrix_expression, op_sub>(proxy1, proxy2); } /** @brief Operator overload for m1 - m2, where m1 and m2 are either dense matrices, matrix ranges, or matrix slices. No mixing of different storage layouts allowed at the moment. */ -template -matrix_expression< const matrix_base, const matrix_base, op_sub > -operator - (const matrix_base & m1, const matrix_base & m2) +template +matrix_expression< const matrix_base, const matrix_base, op_sub > +operator - (const matrix_base & m1, const matrix_base & m2) { - return matrix_expression< const matrix_base, - const matrix_base, + return matrix_expression< const matrix_base, + const matrix_base, op_sub > (m1, m2); } @@ -1377,51 +1377,51 @@ operator * (S1 const & value, matrix_base const & m1) } /** @brief Operator overload for the expression alpha * m1, where alpha is a char (8-bit integer) */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (char value, matrix_base const & m1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (char value, matrix_base const & m1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); } /** @brief Operator overload for the expression alpha * m1, where alpha is a short integer */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (short value, matrix_base const & m1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (short value, matrix_base const & m1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); } /** @brief Operator overload for the expression alpha * m1, where alpha is an integer */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (int value, matrix_base const & m1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (int value, matrix_base const & m1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); } /** @brief Operator overload for the expression alpha * m1, where alpha is a long integer */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (long value, matrix_base const & m1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (long value, matrix_base const & m1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); } /** @brief Operator overload for the expression alpha * m1, where alpha is a single precision floating point value */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (float value, matrix_base const & m1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (float value, matrix_base const & m1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); } /** @brief Operator overload for the expression alpha * m1, where alpha is a double precision floating point value */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (double value, matrix_base const & m1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (double value, matrix_base const & m1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); } @@ -1457,69 +1457,69 @@ operator * (S1 const & val, /** @brief Scales the matrix by a GPU scalar 'alpha' and returns an expression template */ -template +template typename viennacl::enable_if< viennacl::is_any_scalar::value, -matrix_expression< const matrix_base, const S1, op_mult> >::type -operator * (matrix_base const & m1, S1 const & s1) +matrix_expression< const matrix_base, const S1, op_mult> >::type +operator * (matrix_base const & m1, S1 const & s1) { - return matrix_expression< const matrix_base, const S1, op_mult>(m1, s1); + return matrix_expression< const matrix_base, const S1, op_mult>(m1, s1); } /** @brief Scales the matrix by a char (8-bit integer) 'alpha' and returns an expression template. */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (matrix_base const & m1, char s1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (matrix_base const & m1, char s1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); } /** @brief Scales the matrix by a short integer 'alpha' and returns an expression template. */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (matrix_base const & m1, short s1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (matrix_base const & m1, short s1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); } /** @brief Scales the matrix by an integer 'alpha' and returns an expression template. */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (matrix_base const & m1, int s1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (matrix_base const & m1, int s1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); } /** @brief Scales the matrix by a long integer 'alpha' and returns an expression template. */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (matrix_base const & m1, long s1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (matrix_base const & m1, long s1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); } /** @brief Scales the matrix by a single precision floating point number 'alpha' and returns an expression template. */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (matrix_base const & m1, float s1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (matrix_base const & m1, float s1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); } /** @brief Scales the matrix by a double precision floating point number 'alpha' and returns an expression template. */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (matrix_base const & m1, double s1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (matrix_base const & m1, double s1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); } // operator *= /** @brief Scales a matrix by a GPU scalar value */ -template -typename viennacl::enable_if< viennacl::is_scalar::value, matrix_base & >::type -operator *= (matrix_base & m1, S1 const & gpu_val) +template +typename viennacl::enable_if< viennacl::is_scalar::value, matrix_base & >::type +operator *= (matrix_base & m1, S1 const & gpu_val) { bool is_sign_flip = viennacl::is_flip_sign_scalar::value; viennacl::linalg::am(m1, @@ -1528,9 +1528,9 @@ operator *= (matrix_base & m1, S1 const & gpu_val) } /** @brief Scales a matrix by a char (8-bit) value. */ -template -matrix_base & -operator *= (matrix_base & m1, char gpu_val) +template +matrix_base & +operator *= (matrix_base & m1, char gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, false, false); @@ -1538,9 +1538,9 @@ operator *= (matrix_base & m1, char gpu_val) } /** @brief Scales a matrix by a short integer value. */ -template -matrix_base & -operator *= (matrix_base & m1, short gpu_val) +template +matrix_base & +operator *= (matrix_base & m1, short gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, false, false); @@ -1548,9 +1548,9 @@ operator *= (matrix_base & m1, short gpu_val) } /** @brief Scales a matrix by an integer value. */ -template -matrix_base & -operator *= (matrix_base & m1, int gpu_val) +template +matrix_base & +operator *= (matrix_base & m1, int gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, false, false); @@ -1558,9 +1558,9 @@ operator *= (matrix_base & m1, int gpu_val) } /** @brief Scales a matrix by a long integer value. */ -template -matrix_base & -operator *= (matrix_base & m1, long gpu_val) +template +matrix_base & +operator *= (matrix_base & m1, long gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, false, false); @@ -1568,9 +1568,9 @@ operator *= (matrix_base & m1, long gpu_val) } /** @brief Scales a matrix by a single precision floating point value. */ -template -matrix_base & -operator *= (matrix_base & m1, float gpu_val) +template +matrix_base & +operator *= (matrix_base & m1, float gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, false, false); @@ -1578,9 +1578,9 @@ operator *= (matrix_base & m1, float gpu_val) } /** @brief Scales a matrix by a double precision floating point value. */ -template -matrix_base & -operator *= (matrix_base & m1, double gpu_val) +template +matrix_base & +operator *= (matrix_base & m1, double gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, false, false); @@ -1608,33 +1608,33 @@ operator / (matrix_expression const & proxy, /** @brief Returns an expression template for scaling the matrix by a GPU scalar 'alpha' */ -template +template typename viennacl::enable_if< viennacl::is_any_scalar::value, -matrix_expression< const matrix_base, const S1, op_div> >::type -operator / (matrix_base const & m1, S1 const & s1) +matrix_expression< const matrix_base, const S1, op_div> >::type +operator / (matrix_base const & m1, S1 const & s1) { - return matrix_expression< const matrix_base, const S1, op_div>(m1, s1); + return matrix_expression< const matrix_base, const S1, op_div>(m1, s1); } /** @brief Returns an expression template for scaling the matrix by a char (8-bit integer) 'alpha'. */ -template -matrix_expression< const matrix_base, const NumericT, op_div> -operator / (matrix_base const & m1, char s1) +template +matrix_expression< const matrix_base, const NumericT, op_div> +operator / (matrix_base const & m1, char s1) { - return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); } /** @brief Returns an expression template for scaling the matrix by a short integer 'alpha'. */ -template -matrix_expression< const matrix_base, const NumericT, op_div> -operator / (matrix_base const & m1, short s1) +template +matrix_expression< const matrix_base, const NumericT, op_div> +operator / (matrix_base const & m1, short s1) { - return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); } /** @brief Returns an expression template for scaling the matrix by an integer 'alpha'. */ -template -matrix_expression< const matrix_base, const NumericT, op_div> +template +matrix_expression< const matrix_base, const NumericT, op_div> operator / (matrix_base const & m1, int s1) { return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); @@ -1642,26 +1642,26 @@ operator / (matrix_base const & m1, int s1) /** @brief Returns an expression template for scaling the matrix by a long integer 'alpha'. */ template -matrix_expression< const matrix_base, const NumericT, op_div> -operator / (matrix_base const & m1, long s1) +matrix_expression< const matrix_base, const NumericT, op_div> +operator / (matrix_base const & m1, long s1) { - return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); } /** @brief Returns an expression template for scaling the matrix by a single precision floating point number 'alpha'. */ -template -matrix_expression< const matrix_base, const NumericT, op_div> -operator / (matrix_base const & m1, float s1) +template +matrix_expression< const matrix_base, const NumericT, op_div> +operator / (matrix_base const & m1, float s1) { - return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); } /** @brief Returns an expression template for scaling the matrix by a double precision floating point number 'alpha'. */ -template -matrix_expression< const matrix_base, const NumericT, op_div> -operator / (matrix_base const & m1, double s1) +template +matrix_expression< const matrix_base, const NumericT, op_div> +operator / (matrix_base const & m1, double s1) { - return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); } @@ -1669,9 +1669,9 @@ operator / (matrix_base const & m1, double s1) // operator /= /** @brief Scales a matrix by a GPU scalar value */ -template -typename viennacl::enable_if< viennacl::is_scalar::value, matrix_base & >::type -operator /= (matrix_base & m1, S1 const & gpu_val) +template +typename viennacl::enable_if< viennacl::is_scalar::value, matrix_base & >::type +operator /= (matrix_base & m1, S1 const & gpu_val) { viennacl::linalg::am(m1, m1, gpu_val, 1, true, false); @@ -1679,9 +1679,9 @@ operator /= (matrix_base & m1, S1 const & gpu_val) } /** @brief Scales a matrix by a char (8-bit integer) value */ -template -matrix_base & -operator /= (matrix_base & m1, char gpu_val) +template +matrix_base & +operator /= (matrix_base & m1, char gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, true, false); @@ -1689,9 +1689,9 @@ operator /= (matrix_base & m1, char gpu_val) } /** @brief Scales a matrix by a short integer value */ -template -matrix_base & -operator /= (matrix_base & m1, short gpu_val) +template +matrix_base & +operator /= (matrix_base & m1, short gpu_val) { viennacl::linalg::am(m1, m1, gpu_val, 1, true, false); @@ -1699,9 +1699,9 @@ operator /= (matrix_base & m1, short gpu_val) } /** @brief Scales a matrix by an integer value */ -template -matrix_base & -operator /= (matrix_base & m1, int gpu_val) +template +matrix_base & +operator /= (matrix_base & m1, int gpu_val) { viennacl::linalg::am(m1, m1, gpu_val, 1, true, false); @@ -1709,9 +1709,9 @@ operator /= (matrix_base & m1, int gpu_val) } /** @brief Scales a matrix by a long integer value */ -template -matrix_base & -operator /= (matrix_base & m1, long gpu_val) +template +matrix_base & +operator /= (matrix_base & m1, long gpu_val) { viennacl::linalg::am(m1, m1, gpu_val, 1, true, false); @@ -1719,9 +1719,9 @@ operator /= (matrix_base & m1, long gpu_val) } /** @brief Scales a matrix by a single precision floating point value */ -template -matrix_base & -operator /= (matrix_base & m1, float gpu_val) +template +matrix_base & +operator /= (matrix_base & m1, float gpu_val) { viennacl::linalg::am(m1, m1, gpu_val, 1, true, false); @@ -1729,9 +1729,9 @@ operator /= (matrix_base & m1, float gpu_val) } /** @brief Scales a matrix by a double precision floating point value */ -template -matrix_base & -operator /= (matrix_base & m1, double gpu_val) +template +matrix_base & +operator /= (matrix_base & m1, double gpu_val) { viennacl::linalg::am(m1, m1, gpu_val, 1, true, false); @@ -1743,59 +1743,59 @@ operator /= (matrix_base & m1, double gpu_val) // outer_prod(v1, v2) * val; -template +template typename viennacl::enable_if< viennacl::is_scalar::value, -viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, +viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const S1, op_mult> >::type -operator*(const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy, +operator*(const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy, const S1 & val) { - return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, + return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const S1, op_mult>(proxy, val); } -template +template typename viennacl::enable_if< viennacl::is_cpu_scalar::value, -viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, +viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const NumericT, op_mult> >::type -operator*(const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy, +operator*(const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy, const S1 & val) { - return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, + return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const NumericT, op_mult>(proxy, NumericT(val)); } // val * outer_prod(v1, v2); -template +template typename viennacl::enable_if< viennacl::is_scalar::value, -viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, +viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const S1, op_mult> >::type operator*(const S1 & val, - const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy) + const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy) { - return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, + return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const S1, op_mult>(proxy, val); } -template +template typename viennacl::enable_if< viennacl::is_cpu_scalar::value, -viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, +viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const NumericT, op_mult> >::type operator*(const S1 & val, - const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy) + const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy) { - return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, + return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const NumericT, op_mult>(proxy, NumericT(val)); } @@ -1814,18 +1814,18 @@ namespace detail { // x = y - template - struct op_executor, op_assign, matrix_base > + template + struct op_executor, op_assign, matrix_base > { - static void apply(matrix_base & lhs, matrix_base const & rhs) + static void apply(matrix_base & lhs, matrix_base const & rhs) { viennacl::linalg::am(lhs, rhs, T(1), 1, false, false); } }; // x = trans(y) - template - struct op_executor, op_assign, matrix_expression, const matrix_base, op_trans> > + template + struct op_executor, op_assign, matrix_expression, const matrix_base, op_trans> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_trans> const & rhs) { @@ -1850,31 +1850,31 @@ namespace detail // x += y - template - struct op_executor, op_inplace_add, matrix_base > + template + struct op_executor, op_inplace_add, matrix_base > { - static void apply(matrix_base & lhs, matrix_base const & rhs) + static void apply(matrix_base & lhs, matrix_base const & rhs) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, false); } }; // x += trans(y) - template - struct op_executor, op_inplace_add, matrix_expression, const matrix_base, op_trans> > + template + struct op_executor, op_inplace_add, matrix_expression, const matrix_base, op_trans> > { - static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_trans> const & rhs) + static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_trans> const & rhs) { - matrix_base temp(rhs); + matrix_base temp(rhs); viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp, T(1), 1, false, false); } }; // x += trans(expr) - template - struct op_executor, op_inplace_add, matrix_expression, const matrix_expression, op_trans> > + template + struct op_executor, op_inplace_add, matrix_expression, const matrix_expression, op_trans> > { - static void apply(matrix_base & lhs, matrix_expression, + static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, op_trans> const & rhs) { @@ -1906,10 +1906,10 @@ namespace detail }; // x -= trans(expr) - template - struct op_executor, op_inplace_sub, matrix_expression, const matrix_expression, op_trans> > + template + struct op_executor, op_inplace_sub, matrix_expression, const matrix_expression, op_trans> > { - static void apply(matrix_base & lhs, matrix_expression, + static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, op_trans> const & rhs) { @@ -1923,30 +1923,30 @@ namespace detail // x = alpha * y - template - struct op_executor, op_assign, matrix_expression, const ScalarType, op_mult> > + template + struct op_executor, op_assign, matrix_expression, const ScalarType, op_mult> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, false, false); } }; // x += alpha * y - template - struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_mult> > + template + struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_mult> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, false); } }; // x -= alpha * y - template - struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_mult> > + template + struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_mult> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, true); } @@ -1956,10 +1956,10 @@ namespace detail ///////////// x OP vec_expr * alpha //////////////////////// // x = alpha * vec_expr - template - struct op_executor, op_assign, matrix_expression, const ScalarType, op_mult> > + template + struct op_executor, op_assign, matrix_expression, const ScalarType, op_mult> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { if (lhs.row_major()) { @@ -1968,7 +1968,7 @@ namespace detail } else { - matrix temp(proxy.lhs()); + matrix, column_major> temp(proxy.lhs()); lhs = temp * proxy.rhs(); } } @@ -1994,10 +1994,10 @@ namespace detail }; // x -= alpha * vec_expr - template - struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_mult> > + template + struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_mult> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { if (lhs.row_major()) { @@ -2006,7 +2006,7 @@ namespace detail } else { - matrix temp(proxy.lhs()); + matrix, column_major> temp(proxy.lhs()); lhs -= temp * proxy.rhs(); } } @@ -2016,30 +2016,30 @@ namespace detail ///////////// x OP y / alpha //////////////////////// // x = y / alpha - template - struct op_executor, op_assign, matrix_expression, const ScalarType, op_div> > + template + struct op_executor, op_assign, matrix_expression, const ScalarType, op_div> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, true, false); } }; // x += y / alpha - template - struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_div> > + template + struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_div> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, false); } }; // x -= y / alpha - template - struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_div> > + template + struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_div> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, true); } @@ -2049,10 +2049,10 @@ namespace detail ///////////// x OP vec_expr / alpha //////////////////////// // x = vec_expr / alpha - template - struct op_executor, op_assign, matrix_expression, const ScalarType, op_div> > + template + struct op_executor, op_assign, matrix_expression, const ScalarType, op_div> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { if (lhs.row_major()) { @@ -2061,17 +2061,17 @@ namespace detail } else { - matrix temp(proxy.lhs()); + matrix, column_major> temp(proxy.lhs()); lhs = temp / proxy.rhs(); } } }; // x += vec_expr / alpha - template - struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_div> > + template + struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_div> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { if (lhs.row_major()) { @@ -2080,17 +2080,17 @@ namespace detail } else { - matrix temp(proxy.lhs()); + matrix, column_major> temp(proxy.lhs()); lhs += temp / proxy.rhs(); } } }; // x -= vec_expr / alpha - template - struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_div> > + template + struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_div> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { if (lhs.row_major()) { @@ -2099,7 +2099,7 @@ namespace detail } else { - matrix temp(proxy.lhs()); + matrix, column_major> temp(proxy.lhs()); lhs -= temp / proxy.rhs(); } } @@ -2108,12 +2108,12 @@ namespace detail // generic x = vec_expr1 + vec_expr2: - template - struct op_executor, op_assign, matrix_expression > + template + struct op_executor, op_assign, matrix_expression > { // generic x = vec_expr1 + vec_expr2: template - static void apply(matrix_base & lhs, matrix_expression const & proxy) + static void apply(matrix_base & lhs, matrix_expression const & proxy) { bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); -- GitLab From 6ec892852b43a6e870492617f2ad5cfe53eaf428 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 28 Dec 2018 12:09:38 -0600 Subject: [PATCH 39/46] more changes for the new handle --- viennacl/compressed_matrix.hpp | 2 +- viennacl/coordinate_matrix.hpp | 2 +- viennacl/detail/matrix_def.hpp | 12 +- viennacl/forwards.h | 89 ++- viennacl/linalg/detail/ilu/block_ilu.hpp | 2 +- viennacl/linalg/detail/ilu/common.hpp | 42 +- viennacl/linalg/detail/ilu/ilu0.hpp | 32 +- viennacl/linalg/detail/ilu/ilut.hpp | 32 +- .../linalg/host_based/misc_operations.hpp | 8 +- .../host_based/sparse_matrix_operations.hpp | 8 +- viennacl/linalg/misc_operations.hpp | 8 +- viennacl/linalg/opencl/ilu_operations.hpp | 6 +- viennacl/linalg/opencl/misc_operations.hpp | 8 +- .../opencl/sparse_matrix_operations.hpp | 4 +- viennacl/linalg/sparse_matrix_operations.hpp | 2 +- viennacl/linalg/vector_operations.hpp | 48 +- viennacl/matrix.hpp | 642 +++++++++--------- 17 files changed, 473 insertions(+), 474 deletions(-) diff --git a/viennacl/compressed_matrix.hpp b/viennacl/compressed_matrix.hpp index cdb12f44..92fb25c5 100644 --- a/viennacl/compressed_matrix.hpp +++ b/viennacl/compressed_matrix.hpp @@ -630,7 +630,7 @@ class compressed_matrix { typedef compressed_matrix self_type; public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; diff --git a/viennacl/coordinate_matrix.hpp b/viennacl/coordinate_matrix.hpp index 2a24a4ed..3a9eb074 100644 --- a/viennacl/coordinate_matrix.hpp +++ b/viennacl/coordinate_matrix.hpp @@ -186,7 +186,7 @@ template class coordinate_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; diff --git a/viennacl/detail/matrix_def.hpp b/viennacl/detail/matrix_def.hpp index 2b594670..b85afe38 100644 --- a/viennacl/detail/matrix_def.hpp +++ b/viennacl/detail/matrix_def.hpp @@ -99,10 +99,10 @@ public: scalar_matrix(size_type s1, size_type s2, const_reference val, viennacl::context ctx = viennacl::context()) : implicit_matrix_base(s1, s2, val, false, ctx) {} }; -template +template class matrix_base { - typedef matrix_base self_type; + typedef matrix_base self_type; public: typedef matrix_iterator iterator1; @@ -111,7 +111,7 @@ public: typedef NumericT cpu_value_type; typedef SizeT size_type; typedef DistanceT difference_type; - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; /** @brief The default constructor. Does not allocate any memory. */ explicit matrix_base(): size1_(0), size2_(0), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(0), internal_size2_(0), row_major_fixed_(false), row_major_(true) {} @@ -161,8 +161,8 @@ public: matrix_base(const self_type & other); /* Conversion CTOR */ - template - matrix_base(const matrix_base & other); + template + matrix_base(const matrix_base & other); self_type & operator=(const self_type & other); template @@ -249,7 +249,7 @@ public: void switch_memory_context(viennacl::context new_ctx) { viennacl::backend::switch_memory_context(elements_, new_ctx); } protected: - void set_handle(handle_type const & h); + void set_handle(viennacl::backend::mem_handle<> const & h); void resize(size_type rows, size_type columns, bool preserve = true); private: size_type size1_; diff --git a/viennacl/forwards.h b/viennacl/forwards.h index 5877da4c..3fc95e1a 100644 --- a/viennacl/forwards.h +++ b/viennacl/forwards.h @@ -401,10 +401,10 @@ namespace viennacl template class matrix_iterator; - template, typename SizeType = vcl_size_t, typename DistanceType = vcl_ptrdiff_t> + template class matrix_base; - template, typename F = row_major, unsigned int ALIGNMENT = 1> + template class matrix; template @@ -664,28 +664,27 @@ namespace viennacl viennacl::vector& input2, viennacl::vector& output); - template - viennacl::vector_expression, const vector_base, op_element_binary > - element_prod(vector_base const & v1, vector_base const & v2); - - template - viennacl::vector_expression, const vector_base, op_element_binary > - element_div(vector_base const & v1, vector_base const & v2); + template + viennacl::vector_expression, const vector_base, op_element_binary > + element_prod(vector_base const & v1, vector_base const & v2); + template + viennacl::vector_expression, const vector_base, op_element_binary > + element_div(vector_base const & v1, vector_base const & v2); - template - void inner_prod_impl(vector_base const & vec1, - vector_base const & vec2, + template + void inner_prod_impl(vector_base const & vec1, + vector_base const & vec2, scalar & result); - template + template void inner_prod_impl(viennacl::vector_expression const & vec1, - vector_base const & vec2, + vector_base const & vec2, scalar & result); - template - void inner_prod_impl(vector_base const & vec1, + template + void inner_prod_impl(vector_base const & vec1, viennacl::vector_expression const & vec2, scalar & result); @@ -697,18 +696,18 @@ namespace viennacl /////////////////////////// - template - void inner_prod_cpu(vector_base const & vec1, - vector_base const & vec2, + template + void inner_prod_cpu(vector_base const & vec1, + vector_base const & vec2, T & result); - template + template void inner_prod_cpu(viennacl::vector_expression const & vec1, - vector_base const & vec2, + vector_base const & vec2, T & result); - template - void inner_prod_cpu(vector_base const & vec1, + template + void inner_prod_cpu(vector_base const & vec1, viennacl::vector_expression const & vec2, T & result); @@ -721,16 +720,16 @@ namespace viennacl //forward definition of norm_1_impl function - template - void norm_1_impl(vector_base const & vec, scalar & result); + template + void norm_1_impl(vector_base const & vec, scalar & result); template void norm_1_impl(viennacl::vector_expression const & vec, scalar & result); - template - void norm_1_cpu(vector_base const & vec, + template + void norm_1_cpu(vector_base const & vec, T & result); template @@ -738,8 +737,8 @@ namespace viennacl S2 & result); //forward definition of norm_2_impl function - template - void norm_2_impl(vector_base const & vec, scalar & result); + template + void norm_2_impl(vector_base const & vec, scalar & result); template void norm_2_impl(viennacl::vector_expression const & vec, @@ -754,64 +753,64 @@ namespace viennacl //forward definition of norm_inf_impl function - template - void norm_inf_impl(vector_base const & vec, scalar & result); + template + void norm_inf_impl(vector_base const & vec, scalar & result); template void norm_inf_impl(viennacl::vector_expression const & vec, scalar & result); - template - void norm_inf_cpu(vector_base const & vec, T & result); + template + void norm_inf_cpu(vector_base const & vec, T & result); template void norm_inf_cpu(viennacl::vector_expression const & vec, S2 & result); //forward definition of max()-related functions - template - void max_impl(vector_base const & vec, scalar & result); + template + void max_impl(vector_base const & vec, scalar & result); template void max_impl(viennacl::vector_expression const & vec, scalar & result); - template - void max_cpu(vector_base const & vec, T & result); + template + void max_cpu(vector_base const & vec, T & result); template void max_cpu(viennacl::vector_expression const & vec, S2 & result); //forward definition of min()-related functions - template - void min_impl(vector_base const & vec, scalar & result); + template + void min_impl(vector_base const & vec, scalar & result); template void min_impl(viennacl::vector_expression const & vec, scalar & result); - template - void min_cpu(vector_base const & vec, T & result); + template + void min_cpu(vector_base const & vec, T & result); template void min_cpu(viennacl::vector_expression const & vec, S2 & result); //forward definition of sum()-related functions - template - void sum_impl(vector_base const & vec, scalar & result); + template + void sum_impl(vector_base const & vec, scalar & result); template void sum_impl(viennacl::vector_expression const & vec, scalar & result); - template - void sum_cpu(vector_base const & vec, T & result); + template + void sum_cpu(vector_base const & vec, T & result); template void sum_cpu(viennacl::vector_expression const & vec, diff --git a/viennacl/linalg/detail/ilu/block_ilu.hpp b/viennacl/linalg/detail/ilu/block_ilu.hpp index 1540e2dd..1cb2f4b2 100644 --- a/viennacl/linalg/detail/ilu/block_ilu.hpp +++ b/viennacl/linalg/detail/ilu/block_ilu.hpp @@ -595,7 +595,7 @@ private: ILUTagT tag_; index_vector_type block_indices_; - viennacl::backend::mem_handle gpu_block_indices_; + viennacl::backend::mem_handle<> gpu_block_indices_; viennacl::compressed_matrix gpu_L_trans_; viennacl::compressed_matrix gpu_U_trans_; viennacl::vector gpu_D_; diff --git a/viennacl/linalg/detail/ilu/common.hpp b/viennacl/linalg/detail/ilu/common.hpp index 93b0cba8..cb8b81f5 100644 --- a/viennacl/linalg/detail/ilu/common.hpp +++ b/viennacl/linalg/detail/ilu/common.hpp @@ -50,10 +50,10 @@ namespace detail template void level_scheduling_setup_impl(viennacl::compressed_matrix const & LU, viennacl::vector const & diagonal_LU, - std::list & row_index_arrays, - std::list & row_buffers, - std::list & col_buffers, - std::list & element_buffers, + std::list> & row_index_arrays, + std::list> & row_buffers, + std::list> & col_buffers, + std::list> & element_buffers, std::list & row_elimination_num_list, bool setup_U) { @@ -119,19 +119,19 @@ void level_scheduling_setup_impl(viennacl::compressed_matrix 0) { - row_index_arrays.push_back(viennacl::backend::mem_handle()); + row_index_arrays.push_back(viennacl::backend::mem_handle<>()); viennacl::backend::switch_memory_context(row_index_arrays.back(), viennacl::traits::context(LU)); viennacl::backend::typesafe_host_array elim_row_index_array(row_index_arrays.back(), num_tainted_cols); - row_buffers.push_back(viennacl::backend::mem_handle()); + row_buffers.push_back(viennacl::backend::mem_handle<>()); viennacl::backend::switch_memory_context(row_buffers.back(), viennacl::traits::context(LU)); viennacl::backend::typesafe_host_array elim_row_buffer(row_buffers.back(), num_tainted_cols + 1); - col_buffers.push_back(viennacl::backend::mem_handle()); + col_buffers.push_back(viennacl::backend::mem_handle<>()); viennacl::backend::switch_memory_context(col_buffers.back(), viennacl::traits::context(LU)); viennacl::backend::typesafe_host_array elim_col_buffer(col_buffers.back(), num_entries); - element_buffers.push_back(viennacl::backend::mem_handle()); + element_buffers.push_back(viennacl::backend::mem_handle<>()); viennacl::backend::switch_memory_context(element_buffers.back(), viennacl::traits::context(LU)); std::vector elim_elements_buffer(num_entries); @@ -190,10 +190,10 @@ void level_scheduling_setup_impl(viennacl::compressed_matrix void level_scheduling_setup_L(viennacl::compressed_matrix const & LU, viennacl::vector const & diagonal_LU, - std::list & row_index_arrays, - std::list & row_buffers, - std::list & col_buffers, - std::list & element_buffers, + std::list> & row_index_arrays, + std::list> & row_buffers, + std::list> & col_buffers, + std::list> & element_buffers, std::list & row_elimination_num_list) { level_scheduling_setup_impl(LU, diagonal_LU, row_index_arrays, row_buffers, col_buffers, element_buffers, row_elimination_num_list, false); @@ -207,10 +207,10 @@ void level_scheduling_setup_L(viennacl::compressed_matrix template void level_scheduling_setup_U(viennacl::compressed_matrix const & LU, viennacl::vector const & diagonal_LU, - std::list & row_index_arrays, - std::list & row_buffers, - std::list & col_buffers, - std::list & element_buffers, + std::list> & row_index_arrays, + std::list> & row_buffers, + std::list> & col_buffers, + std::list> & element_buffers, std::list & row_elimination_num_list) { level_scheduling_setup_impl(LU, diagonal_LU, row_index_arrays, row_buffers, col_buffers, element_buffers, row_elimination_num_list, true); @@ -222,13 +222,13 @@ void level_scheduling_setup_U(viennacl::compressed_matrix // template void level_scheduling_substitute(viennacl::vector & vec, - std::list const & row_index_arrays, - std::list const & row_buffers, - std::list const & col_buffers, - std::list const & element_buffers, + std::list> const & row_index_arrays, + std::list> const & row_buffers, + std::list> const & col_buffers, + std::list> const & element_buffers, std::list const & row_elimination_num_list) { - typedef typename std::list< viennacl::backend::mem_handle >::const_iterator ListIterator; + typedef typename std::list< viennacl::backend::mem_handle<> >::const_iterator ListIterator; ListIterator row_index_array_it = row_index_arrays.begin(); ListIterator row_buffers_it = row_buffers.begin(); ListIterator col_buffers_it = col_buffers.begin(); diff --git a/viennacl/linalg/detail/ilu/ilu0.hpp b/viennacl/linalg/detail/ilu/ilu0.hpp index 1c3191a7..67f2f7fc 100644 --- a/viennacl/linalg/detail/ilu/ilu0.hpp +++ b/viennacl/linalg/detail/ilu/ilu0.hpp @@ -304,22 +304,22 @@ private: // // L: - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_index_arrays_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_row_index_arrays_.begin(); it != multifrontal_L_row_index_arrays_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_row_buffers_.begin(); it != multifrontal_L_row_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_col_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_col_buffers_.begin(); it != multifrontal_L_col_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_element_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_element_buffers_.begin(); it != multifrontal_L_element_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); @@ -329,22 +329,22 @@ private: viennacl::switch_memory_context(multifrontal_U_diagonal_, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_index_arrays_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_row_index_arrays_.begin(); it != multifrontal_U_row_index_arrays_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_row_buffers_.begin(); it != multifrontal_U_row_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_col_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_col_buffers_.begin(); it != multifrontal_U_col_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_element_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_element_buffers_.begin(); it != multifrontal_U_element_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); @@ -354,17 +354,17 @@ private: ilu0_tag tag_; viennacl::compressed_matrix LU_; - std::list multifrontal_L_row_index_arrays_; - std::list multifrontal_L_row_buffers_; - std::list multifrontal_L_col_buffers_; - std::list multifrontal_L_element_buffers_; + std::list> multifrontal_L_row_index_arrays_; + std::list> multifrontal_L_row_buffers_; + std::list> multifrontal_L_col_buffers_; + std::list> multifrontal_L_element_buffers_; std::list multifrontal_L_row_elimination_num_list_; viennacl::vector multifrontal_U_diagonal_; - std::list multifrontal_U_row_index_arrays_; - std::list multifrontal_U_row_buffers_; - std::list multifrontal_U_col_buffers_; - std::list multifrontal_U_element_buffers_; + std::list> multifrontal_U_row_index_arrays_; + std::list> multifrontal_U_row_buffers_; + std::list> multifrontal_U_col_buffers_; + std::list> multifrontal_U_element_buffers_; std::list multifrontal_U_row_elimination_num_list_; }; diff --git a/viennacl/linalg/detail/ilu/ilut.hpp b/viennacl/linalg/detail/ilu/ilut.hpp index 9c0dd966..650d4521 100644 --- a/viennacl/linalg/detail/ilu/ilut.hpp +++ b/viennacl/linalg/detail/ilu/ilut.hpp @@ -611,22 +611,22 @@ private: // L: - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_index_arrays_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_row_index_arrays_.begin(); it != multifrontal_L_row_index_arrays_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_row_buffers_.begin(); it != multifrontal_L_row_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_col_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_col_buffers_.begin(); it != multifrontal_L_col_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_element_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_element_buffers_.begin(); it != multifrontal_L_element_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); @@ -636,22 +636,22 @@ private: viennacl::switch_memory_context(multifrontal_U_diagonal_, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_index_arrays_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_row_index_arrays_.begin(); it != multifrontal_U_row_index_arrays_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_row_buffers_.begin(); it != multifrontal_U_row_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_col_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_col_buffers_.begin(); it != multifrontal_U_col_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_element_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_element_buffers_.begin(); it != multifrontal_U_element_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); @@ -663,17 +663,17 @@ private: viennacl::compressed_matrix L_; viennacl::compressed_matrix U_; - std::list multifrontal_L_row_index_arrays_; - std::list multifrontal_L_row_buffers_; - std::list multifrontal_L_col_buffers_; - std::list multifrontal_L_element_buffers_; + std::list> multifrontal_L_row_index_arrays_; + std::list> multifrontal_L_row_buffers_; + std::list> multifrontal_L_col_buffers_; + std::list> multifrontal_L_element_buffers_; std::list multifrontal_L_row_elimination_num_list_; viennacl::vector multifrontal_U_diagonal_; - std::list multifrontal_U_row_index_arrays_; - std::list multifrontal_U_row_buffers_; - std::list multifrontal_U_col_buffers_; - std::list multifrontal_U_element_buffers_; + std::list> multifrontal_U_row_index_arrays_; + std::list> multifrontal_U_row_buffers_; + std::list> multifrontal_U_col_buffers_; + std::list> multifrontal_U_element_buffers_; std::list multifrontal_U_row_elimination_num_list_; mutable viennacl::vector x_k_; diff --git a/viennacl/linalg/host_based/misc_operations.hpp b/viennacl/linalg/host_based/misc_operations.hpp index 11061d93..ce1f774f 100644 --- a/viennacl/linalg/host_based/misc_operations.hpp +++ b/viennacl/linalg/host_based/misc_operations.hpp @@ -40,10 +40,10 @@ namespace detail { template void level_scheduling_substitute(vector & vec, - viennacl::backend::mem_handle const & row_index_array, - viennacl::backend::mem_handle const & row_buffer, - viennacl::backend::mem_handle const & col_buffer, - viennacl::backend::mem_handle const & element_buffer, + viennacl::backend::mem_handle<> const & row_index_array, + viennacl::backend::mem_handle<> const & row_buffer, + viennacl::backend::mem_handle<> const & col_buffer, + viennacl::backend::mem_handle<> const & element_buffer, vcl_size_t num_rows ) { diff --git a/viennacl/linalg/host_based/sparse_matrix_operations.hpp b/viennacl/linalg/host_based/sparse_matrix_operations.hpp index 3cb738d1..d535effd 100644 --- a/viennacl/linalg/host_based/sparse_matrix_operations.hpp +++ b/viennacl/linalg/host_based/sparse_matrix_operations.hpp @@ -868,7 +868,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & L, - viennacl::backend::mem_handle const & /* block_indices */, vcl_size_t /* num_blocks */, + viennacl::backend::mem_handle<> const & /* block_indices */, vcl_size_t /* num_blocks */, vector_base const & /* L_diagonal */, //ignored vector_base & vec, viennacl::linalg::unit_lower_tag) @@ -899,7 +899,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & L, - viennacl::backend::mem_handle const & /*block_indices*/, vcl_size_t /* num_blocks */, + viennacl::backend::mem_handle<> const & /*block_indices*/, vcl_size_t /* num_blocks */, vector_base const & L_diagonal, vector_base & vec, viennacl::linalg::lower_tag) @@ -935,7 +935,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & U, - viennacl::backend::mem_handle const & /*block_indices*/, vcl_size_t /* num_blocks */, + viennacl::backend::mem_handle<> const & /*block_indices*/, vcl_size_t /* num_blocks */, vector_base const & /* U_diagonal */, //ignored vector_base & vec, viennacl::linalg::unit_upper_tag) @@ -968,7 +968,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & U, - viennacl::backend::mem_handle const & /* block_indices */, vcl_size_t /* num_blocks */, + viennacl::backend::mem_handle<> const & /* block_indices */, vcl_size_t /* num_blocks */, vector_base const & U_diagonal, vector_base & vec, viennacl::linalg::upper_tag) diff --git a/viennacl/linalg/misc_operations.hpp b/viennacl/linalg/misc_operations.hpp index 208573fd..bf42cc9a 100644 --- a/viennacl/linalg/misc_operations.hpp +++ b/viennacl/linalg/misc_operations.hpp @@ -47,10 +47,10 @@ namespace viennacl template void level_scheduling_substitute(vector & vec, - viennacl::backend::mem_handle const & row_index_array, - viennacl::backend::mem_handle const & row_buffer, - viennacl::backend::mem_handle const & col_buffer, - viennacl::backend::mem_handle const & element_buffer, + viennacl::backend::mem_handle<> const & row_index_array, + viennacl::backend::mem_handle<> const & row_buffer, + viennacl::backend::mem_handle<> const & col_buffer, + viennacl::backend::mem_handle<> const & element_buffer, vcl_size_t num_rows ) { diff --git a/viennacl/linalg/opencl/ilu_operations.hpp b/viennacl/linalg/opencl/ilu_operations.hpp index 248a88ac..e67b8c53 100644 --- a/viennacl/linalg/opencl/ilu_operations.hpp +++ b/viennacl/linalg/opencl/ilu_operations.hpp @@ -119,7 +119,7 @@ void icc_chow_patel_sweep(compressed_matrix & L, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(L).context()); viennacl::linalg::opencl::kernels::ilu::init(ctx); - viennacl::backend::mem_handle L_backup; + viennacl::backend::mem_handle<> L_backup; viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L)); viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size()); @@ -218,11 +218,11 @@ void ilu_chow_patel_sweep(compressed_matrix & L, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(L).context()); viennacl::linalg::opencl::kernels::ilu::init(ctx); - viennacl::backend::mem_handle L_backup; + viennacl::backend::mem_handle<> L_backup; viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L)); viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size()); - viennacl::backend::mem_handle U_backup; + viennacl::backend::mem_handle<> U_backup; viennacl::backend::memory_create(U_backup, U_trans.handle().raw_size(), viennacl::traits::context(U_trans)); viennacl::backend::memory_copy(U_trans.handle(), U_backup, 0, 0, U_trans.handle().raw_size()); diff --git a/viennacl/linalg/opencl/misc_operations.hpp b/viennacl/linalg/opencl/misc_operations.hpp index 83a3db77..38d9d120 100644 --- a/viennacl/linalg/opencl/misc_operations.hpp +++ b/viennacl/linalg/opencl/misc_operations.hpp @@ -43,10 +43,10 @@ namespace detail template void level_scheduling_substitute(vector & x, - viennacl::backend::mem_handle const & row_index_array, - viennacl::backend::mem_handle const & row_buffer, - viennacl::backend::mem_handle const & col_buffer, - viennacl::backend::mem_handle const & element_buffer, + viennacl::backend::mem_handle<> const & row_index_array, + viennacl::backend::mem_handle<> const & row_buffer, + viennacl::backend::mem_handle<> const & col_buffer, + viennacl::backend::mem_handle<> const & element_buffer, vcl_size_t num_rows ) { diff --git a/viennacl/linalg/opencl/sparse_matrix_operations.hpp b/viennacl/linalg/opencl/sparse_matrix_operations.hpp index a8d1557b..6e55b2c5 100644 --- a/viennacl/linalg/opencl/sparse_matrix_operations.hpp +++ b/viennacl/linalg/opencl/sparse_matrix_operations.hpp @@ -483,7 +483,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & L, - viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks, + viennacl::backend::mem_handle<> const & block_indices, vcl_size_t num_blocks, vector_base const & /* L_diagonal */, //ignored vector_base & x, viennacl::linalg::unit_lower_tag) @@ -506,7 +506,7 @@ namespace detail void block_inplace_solve(matrix_expression, const compressed_matrix, op_trans> const & U, - viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks, + viennacl::backend::mem_handle<> const & block_indices, vcl_size_t num_blocks, vector_base const & U_diagonal, vector_base & x, viennacl::linalg::upper_tag) diff --git a/viennacl/linalg/sparse_matrix_operations.hpp b/viennacl/linalg/sparse_matrix_operations.hpp index dccb330c..e6ffdbbb 100644 --- a/viennacl/linalg/sparse_matrix_operations.hpp +++ b/viennacl/linalg/sparse_matrix_operations.hpp @@ -330,7 +330,7 @@ namespace viennacl template typename viennacl::enable_if< viennacl::is_any_sparse_matrix::value>::type block_inplace_solve(const matrix_expression & mat, - viennacl::backend::mem_handle const & block_index_array, vcl_size_t num_blocks, + viennacl::backend::mem_handle<> const & block_index_array, vcl_size_t num_blocks, viennacl::vector_base const & mat_diagonal, viennacl::vector_base & vec, SOLVERTAG tag) diff --git a/viennacl/linalg/vector_operations.hpp b/viennacl/linalg/vector_operations.hpp index c8d72ce0..c8a97e65 100644 --- a/viennacl/linalg/vector_operations.hpp +++ b/viennacl/linalg/vector_operations.hpp @@ -349,25 +349,25 @@ namespace viennacl // Helper macro for generating binary element-wise operations such as element_prod(), element_div(), element_pow() without unnecessary code duplication */ #define VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(OPNAME) \ - template \ - viennacl::vector_expression, const vector_base, op_element_binary > \ - element_##OPNAME(vector_base const & v1, vector_base const & v2) \ + template \ + viennacl::vector_expression, const vector_base, op_element_binary > \ + element_##OPNAME(vector_base const & v1, vector_base const & v2) \ { \ - return viennacl::vector_expression, const vector_base, op_element_binary >(v1, v2); \ + return viennacl::vector_expression, const vector_base, op_element_binary >(v1, v2); \ } \ \ - template \ - viennacl::vector_expression, const vector_base, op_element_binary > \ - element_##OPNAME(vector_expression const & proxy, vector_base const & v2) \ + template \ + viennacl::vector_expression, const vector_base, op_element_binary > \ + element_##OPNAME(vector_expression const & proxy, vector_base const & v2) \ { \ - return viennacl::vector_expression, const vector_base, op_element_binary >(proxy, v2); \ + return viennacl::vector_expression, const vector_base, op_element_binary >(proxy, v2); \ } \ \ - template \ - viennacl::vector_expression, const vector_expression, op_element_binary > \ - element_##OPNAME(vector_base const & v1, vector_expression const & proxy) \ + template \ + viennacl::vector_expression, const vector_expression, op_element_binary > \ + element_##OPNAME(vector_base const & v1, vector_expression const & proxy) \ { \ - return viennacl::vector_expression, const vector_expression, op_element_binary >(v1, proxy); \ + return viennacl::vector_expression, const vector_expression, op_element_binary >(v1, proxy); \ } \ \ template >(proxy1, proxy2); \ }\ \ - template \ - viennacl::vector_expression, const T, op_element_binary > \ - element_##OPNAME(vector_base const & v1, T const & alpha) \ + template \ + viennacl::vector_expression, const T, op_element_binary > \ + element_##OPNAME(vector_base const & v1, T const & alpha) \ { \ - return viennacl::vector_expression, const T, op_element_binary >(v1, alpha); \ + return viennacl::vector_expression, const T, op_element_binary >(v1, alpha); \ } \ \ template \ @@ -397,11 +397,11 @@ namespace viennacl return viennacl::vector_expression, const typename viennacl::result_of::cpu_value_type::type, op_element_binary >(proxy, alpha); \ } \ \ - template \ - viennacl::vector_expression, op_element_binary > \ - element_##OPNAME(T const & alpha, vector_base const & v2) \ + template \ + viennacl::vector_expression, op_element_binary > \ + element_##OPNAME(T const & alpha, vector_base const & v2) \ { \ - return viennacl::vector_expression, op_element_binary >(alpha, v2); \ + return viennacl::vector_expression, op_element_binary >(alpha, v2); \ } \ \ template \ @@ -427,11 +427,11 @@ namespace viennacl // Helper macro for generating unary element-wise operations such as element_exp(), element_sin(), etc. without unnecessary code duplication */ #define VIENNACL_MAKE_UNARY_ELEMENT_OP(funcname) \ - template \ - viennacl::vector_expression, const vector_base, op_element_unary > \ - element_##funcname(vector_base const & v) \ + template \ + viennacl::vector_expression, const vector_base, op_element_unary > \ + element_##funcname(vector_base const & v) \ { \ - return viennacl::vector_expression, const vector_base, op_element_unary >(v, v); \ + return viennacl::vector_expression, const vector_base, op_element_unary >(v, v); \ } \ template \ viennacl::vector_expression, \ diff --git a/viennacl/matrix.hpp b/viennacl/matrix.hpp index c83b6ffd..14c980c3 100644 --- a/viennacl/matrix.hpp +++ b/viennacl/matrix.hpp @@ -123,8 +123,8 @@ private: * @param columns Number of columns * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) */ -template -matrix_base::matrix_base(size_type rows, size_type columns, bool is_row_major, viennacl::context ctx) +template +matrix_base::matrix_base(size_type rows, size_type columns, bool is_row_major, viennacl::context ctx) : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(viennacl::tools::align_to_multiple(rows, dense_padding_size)), internal_size2_(viennacl::tools::align_to_multiple(columns, dense_padding_size)), @@ -139,9 +139,9 @@ matrix_base::matrix_base(size_type rows, /** @brief Constructor for creating a matrix_range or matrix_stride from some other matrix/matrix_range/matrix_stride */ -template +template template -matrix_base::matrix_base(matrix_expression const & proxy) : +matrix_base::matrix_base(matrix_expression const & proxy) : size1_(viennacl::traits::size1(proxy)), size2_(viennacl::traits::size2(proxy)), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(viennacl::tools::align_to_multiple(size1_, dense_padding_size)), internal_size2_(viennacl::tools::align_to_multiple(size2_, dense_padding_size)), @@ -157,8 +157,8 @@ matrix_base::matrix_base(matrix_expressio } // CUDA or host memory: -template -matrix_base::matrix_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type, +template +matrix_base::matrix_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1, size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2, bool is_row_major) @@ -189,8 +189,8 @@ matrix_base::matrix_base(NumericT * ptr_t } #ifdef VIENNACL_WITH_OPENCL -template -matrix_base::matrix_base(cl_mem mem, size_type rows, size_type columns, bool is_row_major, viennacl::context ctx) +template +matrix_base::matrix_base(cl_mem mem, size_type rows, size_type columns, bool is_row_major, viennacl::context ctx) : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1), @@ -204,8 +204,8 @@ matrix_base::matrix_base(cl_mem mem, size elements_.raw_size(sizeof(NumericT)*internal_size()); } -template -matrix_base::matrix_base(cl_mem mem, viennacl::context ctx, +template +matrix_base::matrix_base(cl_mem mem, viennacl::context ctx, size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1, size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2, bool is_row_major) @@ -224,8 +224,8 @@ matrix_base::matrix_base(cl_mem mem, vien #endif // Copy CTOR -template -matrix_base::matrix_base(const matrix_base & other) : +template +matrix_base::matrix_base(const matrix_base & other) : size1_(other.size1()), size2_(other.size2()), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(viennacl::tools::align_to_multiple(size1_, dense_padding_size)), internal_size2_(viennacl::tools::align_to_multiple(size2_, dense_padding_size)), @@ -243,7 +243,7 @@ matrix_base::matrix_base(const matrix_bas // Conversion CTOR template template -matrix_base::matrix_base(const matrix_base & other) : +matrix_base::matrix_base(const matrix_base & other) : size1_(other.size1()), size2_(other.size2()), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(viennacl::tools::align_to_multiple(size1_, dense_padding_size)), internal_size2_(viennacl::tools::align_to_multiple(size2_, dense_padding_size)), @@ -258,8 +258,8 @@ matrix_base::matrix_base(const matrix_bas } } -template -matrix_base & matrix_base::operator=(const self_type & other) //enables implicit conversions +template +matrix_base & matrix_base::operator=(const self_type & other) //enables implicit conversions { if (&other==this) return *this; @@ -279,9 +279,9 @@ matrix_base & matrix_base +template template -matrix_base & matrix_base::operator=(const matrix_base & other) +matrix_base & matrix_base::operator=(const matrix_base & other) { if (internal_size() == 0) { @@ -300,9 +300,9 @@ matrix_base & matrix_base +template template -matrix_base & matrix_base::operator=(const matrix_expression & proxy) +matrix_base & matrix_base::operator=(const matrix_expression & proxy) { assert( (viennacl::traits::size1(proxy) == size1() || size1() == 0) && (viennacl::traits::size2(proxy) == size2() || size2() == 0) @@ -328,8 +328,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator=(const matrix_expression & proxy) +template +matrix_base & matrix_base::operator=(const matrix_expression & proxy) { if ( internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0 ) { @@ -358,9 +358,9 @@ matrix_base & matrix_base +template template -matrix_base & matrix_base::operator+=(const matrix_expression & proxy) +matrix_base & matrix_base::operator+=(const matrix_expression & proxy) { assert( (viennacl::traits::size1(proxy) == size1()) && (viennacl::traits::size2(proxy) == size2()) @@ -373,9 +373,9 @@ matrix_base & matrix_base +template template -matrix_base & matrix_base::operator-=(const matrix_expression & proxy) +matrix_base & matrix_base::operator-=(const matrix_expression & proxy) { assert( (viennacl::traits::size1(proxy) == size1()) && (viennacl::traits::size2(proxy) == size2()) @@ -389,8 +389,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator = (identity_matrix const & m) +template +matrix_base & matrix_base::operator = (identity_matrix const & m) { assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") ); assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") ); @@ -417,8 +417,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator = (zero_matrix const & m) +template +matrix_base & matrix_base::operator = (zero_matrix const & m) { assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") ); assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") ); @@ -442,8 +442,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator = (scalar_matrix const & m) +template +matrix_base & matrix_base::operator = (scalar_matrix const & m) { assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") ); assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") ); @@ -473,8 +473,8 @@ matrix_base & matrix_base -entry_proxy matrix_base::operator()(size_type row_index, size_type col_index) +template +entry_proxy matrix_base::operator()(size_type row_index, size_type col_index) { if (row_major_) return entry_proxy(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_); @@ -483,8 +483,8 @@ entry_proxy matrix_base::operat /** @brief Read access to a single element of the matrix/matrix_range/matrix_slice */ -template -const_entry_proxy matrix_base::operator()(size_type row_index, size_type col_index) const +template +const_entry_proxy matrix_base::operator()(size_type row_index, size_type col_index) const { if (row_major_) return const_entry_proxy(row_major::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_); @@ -494,8 +494,8 @@ const_entry_proxy matrix_base:: // // Operator overloads for enabling implicit conversions: // -template -matrix_base & matrix_base::operator += (const matrix_base & other) +template +matrix_base & matrix_base::operator += (const matrix_base & other) { viennacl::linalg::ambm(*this, *this, NumericT(1.0), 1, false, false, @@ -503,8 +503,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator -= (const matrix_base & other) +template +matrix_base & matrix_base::operator -= (const matrix_base & other) { viennacl::linalg::ambm(*this, *this, NumericT(1.0), 1, false, false, @@ -513,8 +513,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator *= (char val) +template +matrix_base & matrix_base::operator *= (char val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, false, false); @@ -522,8 +522,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator *= (short val) +template +matrix_base & matrix_base::operator *= (short val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, false, false); @@ -531,8 +531,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator *= (int val) +template +matrix_base & matrix_base::operator *= (int val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, false, false); @@ -540,8 +540,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator *= (long val) +template +matrix_base & matrix_base::operator *= (long val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, false, false); @@ -549,8 +549,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator *= (float val) +template +matrix_base & matrix_base::operator *= (float val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, false, false); @@ -558,8 +558,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator *= (double val) +template +matrix_base & matrix_base::operator *= (double val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, false, false); @@ -569,8 +569,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator /= (char val) +template +matrix_base & matrix_base::operator /= (char val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, true, false); @@ -578,8 +578,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator /= (short val) +template +matrix_base & matrix_base::operator /= (short val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, true, false); @@ -587,8 +587,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator /= (int val) +template +matrix_base & matrix_base::operator /= (int val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, true, false); @@ -596,8 +596,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator /= (long val) +template +matrix_base & matrix_base::operator /= (long val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, true, false); @@ -605,8 +605,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator /= (float val) +template +matrix_base & matrix_base::operator /= (float val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, true, false); @@ -614,8 +614,8 @@ matrix_base & matrix_base -matrix_base & matrix_base::operator /= (double val) +template +matrix_base & matrix_base::operator /= (double val) { viennacl::linalg::am(*this, *this, NumericT(val), 1, true, false); @@ -624,18 +624,18 @@ matrix_base & matrix_base -matrix_expression, const NumericT, op_mult> matrix_base::operator-() const +template +matrix_expression, const NumericT, op_mult> matrix_base::operator-() const { return matrix_expression(*this, NumericT(-1)); } -template -void matrix_base::clear() { viennacl::linalg::matrix_assign(*this, NumericT(0), true); } +template +void matrix_base::clear() { viennacl::linalg::matrix_assign(*this, NumericT(0), true); } -template -void matrix_base::resize(size_type rows, size_type columns, bool preserve) +template +void matrix_base::resize(size_type rows, size_type columns, bool preserve) { assert( (rows > 0 && columns > 0) && bool("Check failed in matrix::resize(): Number of rows and columns must be positive!")); @@ -692,11 +692,11 @@ void matrix_base::resize(size_type rows, * @tparam F Storage layout: Either row_major or column_major * @tparam AlignmentV The internal memory size is given by (size()/AlignmentV + 1) * AlignmentV. AlignmentV must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. */ -template -class matrix : public matrix_base +template +class matrix : public matrix_base { - typedef matrix self_type; - typedef matrix_base base_type; + typedef matrix self_type; + typedef matrix_base base_type; public: typedef typename base_type::size_type size_type; @@ -824,10 +824,10 @@ public: * @param s STL output stream * @param gpu_matrix A dense ViennaCL matrix */ -template -std::ostream & operator<<(std::ostream & s, const matrix_base & gpu_matrix) +template +std::ostream & operator<<(std::ostream & s, const matrix_base & gpu_matrix) { - typedef typename matrix_base::size_type size_type; + typedef typename matrix_base::size_type size_type; std::vector tmp(gpu_matrix.internal_size()); viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * gpu_matrix.internal_size(), &(tmp[0])); @@ -872,11 +872,11 @@ std::ostream & operator<<(std::ostream & s, const matrix_expression -matrix_expression< const matrix_base, const matrix_base, op_trans> +template +matrix_expression< const matrix_base, const matrix_base, op_trans> trans(const matrix_base & mat) { - return matrix_expression< const matrix_base, const matrix_base, op_trans>(mat, mat); + return matrix_expression< const matrix_base, const matrix_base, op_trans>(mat, mat); } /** @brief Returns an expression template class representing the transposed matrix expression */ @@ -890,34 +890,34 @@ trans(const matrix_expression & proxy) } //diag(): -template -vector_expression< const matrix_base, const int, op_matrix_diag> +template +vector_expression< const matrix_base, const int, op_matrix_diag> diag(const matrix_base & A, int k = 0) { - return vector_expression< const matrix_base, const int, op_matrix_diag>(A, k); + return vector_expression< const matrix_base, const int, op_matrix_diag>(A, k); } -template -matrix_expression< const vector_base, const int, op_vector_diag> +template +matrix_expression< const vector_base, const int, op_vector_diag> diag(const vector_base & v, int k = 0) { - return matrix_expression< const vector_base, const int, op_vector_diag>(v, k); + return matrix_expression< const vector_base, const int, op_vector_diag>(v, k); } // row(): -template -vector_expression< const matrix_base, const unsigned int, op_row> -row(const matrix_base & A, unsigned int i) +template +vector_expression< const matrix_base, const unsigned int, op_row> +row(const matrix_base & A, unsigned int i) { - return vector_expression< const matrix_base, const unsigned int, op_row>(A, i); + return vector_expression< const matrix_base, const unsigned int, op_row>(A, i); } // column(): -template -vector_expression< const matrix_base, const unsigned int, op_column> -column(const matrix_base & A, unsigned int j) +template +vector_expression< const matrix_base, const unsigned int, op_column> +column(const matrix_base & A, unsigned int j) { - return vector_expression< const matrix_base, const unsigned int, op_column>(A, j); + return vector_expression< const matrix_base, const unsigned int, op_column>(A, j); } /////////////////////// transfer operations: ////////////////////////////////////// @@ -930,11 +930,11 @@ column(const matrix_base & A, unsigned int j) * @param cpu_matrix A dense matrix on the host. Type requirements: .size1() returns number of rows, .size2() returns number of columns. Access to entries via operator() * @param gpu_matrix A dense ViennaCL matrix */ -template +template void copy(const CPUMatrixT & cpu_matrix, - matrix & gpu_matrix ) + matrix & gpu_matrix ) { - typedef typename matrix::size_type size_type; + typedef typename matrix::size_type size_type; //std::cout << "Copying CPUMatrixT!" << std::endl; //std::cout << "Size at begin: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl; @@ -966,11 +966,11 @@ void copy(const CPUMatrixT & cpu_matrix, * @param cpu_matrix A dense matrix on the host of type std::vector< std::vector<> >. cpu_matrix[i][j] returns the element in the i-th row and j-th columns (both starting with zero) * @param gpu_matrix A dense ViennaCL matrix */ -template +template void copy(const std::vector< std::vector, A2> & cpu_matrix, - matrix & gpu_matrix ) + matrix & gpu_matrix ) { - typedef typename matrix::size_type size_type; + typedef typename matrix::size_type size_type; if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0) { @@ -1006,10 +1006,10 @@ void copy(const std::vector< std::vector, A2> & cpu_matrix, * @param cpu_matrix_end Pointer past the last matrix entry. Cf. iterator concept in STL * @param gpu_matrix A dense ViennaCL matrix */ -template +template void fast_copy(NumericT * cpu_matrix_begin, NumericT * cpu_matrix_end, - matrix & gpu_matrix) + matrix & gpu_matrix) { if (gpu_matrix.internal_size() == 0) viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(NumericT) * static_cast(cpu_matrix_end - cpu_matrix_begin), viennacl::traits::context(gpu_matrix), cpu_matrix_begin); @@ -1026,11 +1026,11 @@ void fast_copy(NumericT * cpu_matrix_begin, * @param arma_matrix A dense MTL matrix. cpu_matrix(i, j) returns the element in the i-th row and j-th columns (both starting with zero) * @param gpu_matrix A dense ViennaCL matrix */ -template +template void copy(arma::Mat const & arma_matrix, - viennacl::matrix & vcl_matrix) + viennacl::matrix & vcl_matrix) { - typedef typename viennacl::matrix::size_type size_type; + typedef typename viennacl::matrix::size_type size_type; if (vcl_matrix.size1() == 0 || vcl_matrix.size2() == 0) { @@ -1195,8 +1195,8 @@ void copy(const matrix & gpu_matrix, * @param gpu_matrix A dense ViennaCL matrix * @param cpu_matrix A dense memory on the host using STL types, typically std::vector< std::vector<> > Must have at least as many rows and columns as the gpu_matrix! Type requirement: Access to entries via operator() */ -template -void copy(const matrix & gpu_matrix, +template +void copy(const matrix & gpu_matrix, std::vector< std::vector, A2> & cpu_matrix) { typedef typename matrix::size_type size_type; @@ -1227,8 +1227,8 @@ void copy(const matrix & gpu_matrix, * @param gpu_matrix A dense ViennaCL matrix * @param cpu_matrix_begin Pointer to the output memory on the CPU. User must ensure that provided memory is large enough. */ -template -void fast_copy(const matrix & gpu_matrix, +template +void fast_copy(const matrix & gpu_matrix, NumericT * cpu_matrix_begin) { viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)*gpu_matrix.internal_size(), cpu_matrix_begin); @@ -1258,12 +1258,12 @@ operator + (matrix_expression const & proxy1, } template + typename NumericT> matrix_expression< const matrix_expression, -const matrix_base, +const matrix_base, op_add> operator + (matrix_expression const & proxy1, - matrix_base const & proxy2) + matrix_base const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) @@ -1273,12 +1273,12 @@ operator + (matrix_expression const & proxy1, op_add>(proxy1, proxy2); } -template -matrix_expression< const matrix_base, +matrix_expression< const matrix_base, const matrix_expression, op_add> -operator + (matrix_base const & proxy1, +operator + (matrix_base const & proxy1, matrix_expression const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) @@ -1318,44 +1318,44 @@ operator - (matrix_expression const & proxy1, } template + typename NumericT> matrix_expression< const matrix_expression, -const matrix_base, +const matrix_base, op_sub> operator - (matrix_expression const & proxy1, - matrix_base const & proxy2) + matrix_base const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) && bool("Incompatible matrix sizes!")); return matrix_expression< const matrix_expression, - const matrix_base, + const matrix_base, op_sub>(proxy1, proxy2); } -template -matrix_expression< const matrix_base, +matrix_expression< const matrix_base, const matrix_expression, op_sub> -operator - (matrix_base const & proxy1, +operator - (matrix_base const & proxy1, matrix_expression const & proxy2) { assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2)) && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2)) && bool("Incompatible matrix sizes!")); - return matrix_expression< const matrix_base, + return matrix_expression< const matrix_base, const matrix_expression, op_sub>(proxy1, proxy2); } /** @brief Operator overload for m1 - m2, where m1 and m2 are either dense matrices, matrix ranges, or matrix slices. No mixing of different storage layouts allowed at the moment. */ -template -matrix_expression< const matrix_base, const matrix_base, op_sub > -operator - (const matrix_base & m1, const matrix_base & m2) +template +matrix_expression< const matrix_base, const matrix_base, op_sub > +operator - (const matrix_base & m1, const matrix_base & m2) { - return matrix_expression< const matrix_base, - const matrix_base, + return matrix_expression< const matrix_base, + const matrix_base, op_sub > (m1, m2); } @@ -1377,51 +1377,51 @@ operator * (S1 const & value, matrix_base const & m1) } /** @brief Operator overload for the expression alpha * m1, where alpha is a char (8-bit integer) */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (char value, matrix_base const & m1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (char value, matrix_base const & m1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); } /** @brief Operator overload for the expression alpha * m1, where alpha is a short integer */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (short value, matrix_base const & m1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (short value, matrix_base const & m1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); } /** @brief Operator overload for the expression alpha * m1, where alpha is an integer */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (int value, matrix_base const & m1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (int value, matrix_base const & m1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); } /** @brief Operator overload for the expression alpha * m1, where alpha is a long integer */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (long value, matrix_base const & m1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (long value, matrix_base const & m1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); } /** @brief Operator overload for the expression alpha * m1, where alpha is a single precision floating point value */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (float value, matrix_base const & m1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (float value, matrix_base const & m1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); } /** @brief Operator overload for the expression alpha * m1, where alpha is a double precision floating point value */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (double value, matrix_base const & m1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (double value, matrix_base const & m1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(value)); } @@ -1457,69 +1457,69 @@ operator * (S1 const & val, /** @brief Scales the matrix by a GPU scalar 'alpha' and returns an expression template */ -template +template typename viennacl::enable_if< viennacl::is_any_scalar::value, -matrix_expression< const matrix_base, const S1, op_mult> >::type -operator * (matrix_base const & m1, S1 const & s1) +matrix_expression< const matrix_base, const S1, op_mult> >::type +operator * (matrix_base const & m1, S1 const & s1) { - return matrix_expression< const matrix_base, const S1, op_mult>(m1, s1); + return matrix_expression< const matrix_base, const S1, op_mult>(m1, s1); } /** @brief Scales the matrix by a char (8-bit integer) 'alpha' and returns an expression template. */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (matrix_base const & m1, char s1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (matrix_base const & m1, char s1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); } /** @brief Scales the matrix by a short integer 'alpha' and returns an expression template. */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (matrix_base const & m1, short s1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (matrix_base const & m1, short s1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); } /** @brief Scales the matrix by an integer 'alpha' and returns an expression template. */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (matrix_base const & m1, int s1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (matrix_base const & m1, int s1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); } /** @brief Scales the matrix by a long integer 'alpha' and returns an expression template. */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (matrix_base const & m1, long s1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (matrix_base const & m1, long s1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); } /** @brief Scales the matrix by a single precision floating point number 'alpha' and returns an expression template. */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (matrix_base const & m1, float s1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (matrix_base const & m1, float s1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); } /** @brief Scales the matrix by a double precision floating point number 'alpha' and returns an expression template. */ -template -matrix_expression< const matrix_base, const NumericT, op_mult> -operator * (matrix_base const & m1, double s1) +template +matrix_expression< const matrix_base, const NumericT, op_mult> +operator * (matrix_base const & m1, double s1) { - return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_mult>(m1, NumericT(s1)); } // operator *= /** @brief Scales a matrix by a GPU scalar value */ -template -typename viennacl::enable_if< viennacl::is_scalar::value, matrix_base & >::type -operator *= (matrix_base & m1, S1 const & gpu_val) +template +typename viennacl::enable_if< viennacl::is_scalar::value, matrix_base & >::type +operator *= (matrix_base & m1, S1 const & gpu_val) { bool is_sign_flip = viennacl::is_flip_sign_scalar::value; viennacl::linalg::am(m1, @@ -1528,9 +1528,9 @@ operator *= (matrix_base & m1, S1 const & gpu_val) } /** @brief Scales a matrix by a char (8-bit) value. */ -template -matrix_base & -operator *= (matrix_base & m1, char gpu_val) +template +matrix_base & +operator *= (matrix_base & m1, char gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, false, false); @@ -1538,9 +1538,9 @@ operator *= (matrix_base & m1, char gpu_val) } /** @brief Scales a matrix by a short integer value. */ -template -matrix_base & -operator *= (matrix_base & m1, short gpu_val) +template +matrix_base & +operator *= (matrix_base & m1, short gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, false, false); @@ -1548,9 +1548,9 @@ operator *= (matrix_base & m1, short gpu_val) } /** @brief Scales a matrix by an integer value. */ -template -matrix_base & -operator *= (matrix_base & m1, int gpu_val) +template +matrix_base & +operator *= (matrix_base & m1, int gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, false, false); @@ -1558,9 +1558,9 @@ operator *= (matrix_base & m1, int gpu_val) } /** @brief Scales a matrix by a long integer value. */ -template -matrix_base & -operator *= (matrix_base & m1, long gpu_val) +template +matrix_base & +operator *= (matrix_base & m1, long gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, false, false); @@ -1568,9 +1568,9 @@ operator *= (matrix_base & m1, long gpu_val) } /** @brief Scales a matrix by a single precision floating point value. */ -template -matrix_base & -operator *= (matrix_base & m1, float gpu_val) +template +matrix_base & +operator *= (matrix_base & m1, float gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, false, false); @@ -1578,9 +1578,9 @@ operator *= (matrix_base & m1, float gpu_val) } /** @brief Scales a matrix by a double precision floating point value. */ -template -matrix_base & -operator *= (matrix_base & m1, double gpu_val) +template +matrix_base & +operator *= (matrix_base & m1, double gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, false, false); @@ -1608,33 +1608,33 @@ operator / (matrix_expression const & proxy, /** @brief Returns an expression template for scaling the matrix by a GPU scalar 'alpha' */ -template +template typename viennacl::enable_if< viennacl::is_any_scalar::value, -matrix_expression< const matrix_base, const S1, op_div> >::type -operator / (matrix_base const & m1, S1 const & s1) +matrix_expression< const matrix_base, const S1, op_div> >::type +operator / (matrix_base const & m1, S1 const & s1) { - return matrix_expression< const matrix_base, const S1, op_div>(m1, s1); + return matrix_expression< const matrix_base, const S1, op_div>(m1, s1); } /** @brief Returns an expression template for scaling the matrix by a char (8-bit integer) 'alpha'. */ -template -matrix_expression< const matrix_base, const NumericT, op_div> -operator / (matrix_base const & m1, char s1) +template +matrix_expression< const matrix_base, const NumericT, op_div> +operator / (matrix_base const & m1, char s1) { - return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); } /** @brief Returns an expression template for scaling the matrix by a short integer 'alpha'. */ -template -matrix_expression< const matrix_base, const NumericT, op_div> -operator / (matrix_base const & m1, short s1) +template +matrix_expression< const matrix_base, const NumericT, op_div> +operator / (matrix_base const & m1, short s1) { - return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); } /** @brief Returns an expression template for scaling the matrix by an integer 'alpha'. */ -template -matrix_expression< const matrix_base, const NumericT, op_div> +template +matrix_expression< const matrix_base, const NumericT, op_div> operator / (matrix_base const & m1, int s1) { return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); @@ -1642,26 +1642,26 @@ operator / (matrix_base const & m1, int s1) /** @brief Returns an expression template for scaling the matrix by a long integer 'alpha'. */ template -matrix_expression< const matrix_base, const NumericT, op_div> -operator / (matrix_base const & m1, long s1) +matrix_expression< const matrix_base, const NumericT, op_div> +operator / (matrix_base const & m1, long s1) { - return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); } /** @brief Returns an expression template for scaling the matrix by a single precision floating point number 'alpha'. */ -template -matrix_expression< const matrix_base, const NumericT, op_div> -operator / (matrix_base const & m1, float s1) +template +matrix_expression< const matrix_base, const NumericT, op_div> +operator / (matrix_base const & m1, float s1) { - return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); } /** @brief Returns an expression template for scaling the matrix by a double precision floating point number 'alpha'. */ -template -matrix_expression< const matrix_base, const NumericT, op_div> -operator / (matrix_base const & m1, double s1) +template +matrix_expression< const matrix_base, const NumericT, op_div> +operator / (matrix_base const & m1, double s1) { - return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); + return matrix_expression< const matrix_base, const NumericT, op_div>(m1, NumericT(s1)); } @@ -1669,9 +1669,9 @@ operator / (matrix_base const & m1, double s1) // operator /= /** @brief Scales a matrix by a GPU scalar value */ -template -typename viennacl::enable_if< viennacl::is_scalar::value, matrix_base & >::type -operator /= (matrix_base & m1, S1 const & gpu_val) +template +typename viennacl::enable_if< viennacl::is_scalar::value, matrix_base & >::type +operator /= (matrix_base & m1, S1 const & gpu_val) { viennacl::linalg::am(m1, m1, gpu_val, 1, true, false); @@ -1679,9 +1679,9 @@ operator /= (matrix_base & m1, S1 const & gpu_val) } /** @brief Scales a matrix by a char (8-bit integer) value */ -template -matrix_base & -operator /= (matrix_base & m1, char gpu_val) +template +matrix_base & +operator /= (matrix_base & m1, char gpu_val) { viennacl::linalg::am(m1, m1, NumericT(gpu_val), 1, true, false); @@ -1689,9 +1689,9 @@ operator /= (matrix_base & m1, char gpu_val) } /** @brief Scales a matrix by a short integer value */ -template -matrix_base & -operator /= (matrix_base & m1, short gpu_val) +template +matrix_base & +operator /= (matrix_base & m1, short gpu_val) { viennacl::linalg::am(m1, m1, gpu_val, 1, true, false); @@ -1699,9 +1699,9 @@ operator /= (matrix_base & m1, short gpu_val) } /** @brief Scales a matrix by an integer value */ -template -matrix_base & -operator /= (matrix_base & m1, int gpu_val) +template +matrix_base & +operator /= (matrix_base & m1, int gpu_val) { viennacl::linalg::am(m1, m1, gpu_val, 1, true, false); @@ -1709,9 +1709,9 @@ operator /= (matrix_base & m1, int gpu_val) } /** @brief Scales a matrix by a long integer value */ -template -matrix_base & -operator /= (matrix_base & m1, long gpu_val) +template +matrix_base & +operator /= (matrix_base & m1, long gpu_val) { viennacl::linalg::am(m1, m1, gpu_val, 1, true, false); @@ -1719,9 +1719,9 @@ operator /= (matrix_base & m1, long gpu_val) } /** @brief Scales a matrix by a single precision floating point value */ -template -matrix_base & -operator /= (matrix_base & m1, float gpu_val) +template +matrix_base & +operator /= (matrix_base & m1, float gpu_val) { viennacl::linalg::am(m1, m1, gpu_val, 1, true, false); @@ -1729,9 +1729,9 @@ operator /= (matrix_base & m1, float gpu_val) } /** @brief Scales a matrix by a double precision floating point value */ -template -matrix_base & -operator /= (matrix_base & m1, double gpu_val) +template +matrix_base & +operator /= (matrix_base & m1, double gpu_val) { viennacl::linalg::am(m1, m1, gpu_val, 1, true, false); @@ -1743,59 +1743,59 @@ operator /= (matrix_base & m1, double gpu_val) // outer_prod(v1, v2) * val; -template +template typename viennacl::enable_if< viennacl::is_scalar::value, -viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, +viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const S1, op_mult> >::type -operator*(const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy, +operator*(const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy, const S1 & val) { - return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, + return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const S1, op_mult>(proxy, val); } -template +template typename viennacl::enable_if< viennacl::is_cpu_scalar::value, -viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, +viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const NumericT, op_mult> >::type -operator*(const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy, +operator*(const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy, const S1 & val) { - return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, + return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const NumericT, op_mult>(proxy, NumericT(val)); } // val * outer_prod(v1, v2); -template +template typename viennacl::enable_if< viennacl::is_scalar::value, -viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, +viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const S1, op_mult> >::type operator*(const S1 & val, - const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy) + const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy) { - return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, + return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const S1, op_mult>(proxy, val); } -template +template typename viennacl::enable_if< viennacl::is_cpu_scalar::value, -viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, +viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const NumericT, op_mult> >::type operator*(const S1 & val, - const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy) + const viennacl::matrix_expression< const vector_base, const vector_base, op_prod> & proxy) { - return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, + return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base, const vector_base, op_prod>, const NumericT, op_mult>(proxy, NumericT(val)); } @@ -1814,18 +1814,18 @@ namespace detail { // x = y - template - struct op_executor, op_assign, matrix_base > + template + struct op_executor, op_assign, matrix_base > { - static void apply(matrix_base & lhs, matrix_base const & rhs) + static void apply(matrix_base & lhs, matrix_base const & rhs) { viennacl::linalg::am(lhs, rhs, T(1), 1, false, false); } }; // x = trans(y) - template - struct op_executor, op_assign, matrix_expression, const matrix_base, op_trans> > + template + struct op_executor, op_assign, matrix_expression, const matrix_base, op_trans> > { static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_trans> const & rhs) { @@ -1850,31 +1850,31 @@ namespace detail // x += y - template - struct op_executor, op_inplace_add, matrix_base > + template + struct op_executor, op_inplace_add, matrix_base > { - static void apply(matrix_base & lhs, matrix_base const & rhs) + static void apply(matrix_base & lhs, matrix_base const & rhs) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, false); } }; // x += trans(y) - template - struct op_executor, op_inplace_add, matrix_expression, const matrix_base, op_trans> > + template + struct op_executor, op_inplace_add, matrix_expression, const matrix_base, op_trans> > { - static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_trans> const & rhs) + static void apply(matrix_base & lhs, matrix_expression, const matrix_base, op_trans> const & rhs) { - matrix_base temp(rhs); + matrix_base temp(rhs); viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, temp, T(1), 1, false, false); } }; // x += trans(expr) - template - struct op_executor, op_inplace_add, matrix_expression, const matrix_expression, op_trans> > + template + struct op_executor, op_inplace_add, matrix_expression, const matrix_expression, op_trans> > { - static void apply(matrix_base & lhs, matrix_expression, + static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, op_trans> const & rhs) { @@ -1906,10 +1906,10 @@ namespace detail }; // x -= trans(expr) - template - struct op_executor, op_inplace_sub, matrix_expression, const matrix_expression, op_trans> > + template + struct op_executor, op_inplace_sub, matrix_expression, const matrix_expression, op_trans> > { - static void apply(matrix_base & lhs, matrix_expression, + static void apply(matrix_base & lhs, matrix_expression, const matrix_expression, op_trans> const & rhs) { @@ -1923,30 +1923,30 @@ namespace detail // x = alpha * y - template - struct op_executor, op_assign, matrix_expression, const ScalarType, op_mult> > + template + struct op_executor, op_assign, matrix_expression, const ScalarType, op_mult> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, false, false); } }; // x += alpha * y - template - struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_mult> > + template + struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_mult> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, false); } }; // x -= alpha * y - template - struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_mult> > + template + struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_mult> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, true); } @@ -1956,10 +1956,10 @@ namespace detail ///////////// x OP vec_expr * alpha //////////////////////// // x = alpha * vec_expr - template - struct op_executor, op_assign, matrix_expression, const ScalarType, op_mult> > + template + struct op_executor, op_assign, matrix_expression, const ScalarType, op_mult> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { if (lhs.row_major()) { @@ -1968,7 +1968,7 @@ namespace detail } else { - matrix, column_major> temp(proxy.lhs()); + matrix temp(proxy.lhs()); lhs = temp * proxy.rhs(); } } @@ -1994,10 +1994,10 @@ namespace detail }; // x -= alpha * vec_expr - template - struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_mult> > + template + struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_mult> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_mult> const & proxy) { if (lhs.row_major()) { @@ -2006,7 +2006,7 @@ namespace detail } else { - matrix, column_major> temp(proxy.lhs()); + matrix temp(proxy.lhs()); lhs -= temp * proxy.rhs(); } } @@ -2016,30 +2016,30 @@ namespace detail ///////////// x OP y / alpha //////////////////////// // x = y / alpha - template - struct op_executor, op_assign, matrix_expression, const ScalarType, op_div> > + template + struct op_executor, op_assign, matrix_expression, const ScalarType, op_div> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, true, false); } }; // x += y / alpha - template - struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_div> > + template + struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_div> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, false); } }; // x -= y / alpha - template - struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_div> > + template + struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_div> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, true); } @@ -2049,10 +2049,10 @@ namespace detail ///////////// x OP vec_expr / alpha //////////////////////// // x = vec_expr / alpha - template - struct op_executor, op_assign, matrix_expression, const ScalarType, op_div> > + template + struct op_executor, op_assign, matrix_expression, const ScalarType, op_div> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { if (lhs.row_major()) { @@ -2061,17 +2061,17 @@ namespace detail } else { - matrix, column_major> temp(proxy.lhs()); + matrix temp(proxy.lhs()); lhs = temp / proxy.rhs(); } } }; // x += vec_expr / alpha - template - struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_div> > + template + struct op_executor, op_inplace_add, matrix_expression, const ScalarType, op_div> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { if (lhs.row_major()) { @@ -2080,17 +2080,17 @@ namespace detail } else { - matrix, column_major> temp(proxy.lhs()); + matrix temp(proxy.lhs()); lhs += temp / proxy.rhs(); } } }; // x -= vec_expr / alpha - template - struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_div> > + template + struct op_executor, op_inplace_sub, matrix_expression, const ScalarType, op_div> > { - static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) + static void apply(matrix_base & lhs, matrix_expression, const ScalarType, op_div> const & proxy) { if (lhs.row_major()) { @@ -2099,7 +2099,7 @@ namespace detail } else { - matrix, column_major> temp(proxy.lhs()); + matrix temp(proxy.lhs()); lhs -= temp / proxy.rhs(); } } @@ -2108,12 +2108,12 @@ namespace detail // generic x = vec_expr1 + vec_expr2: - template - struct op_executor, op_assign, matrix_expression > + template + struct op_executor, op_assign, matrix_expression > { // generic x = vec_expr1 + vec_expr2: template - static void apply(matrix_base & lhs, matrix_expression const & proxy) + static void apply(matrix_base & lhs, matrix_expression const & proxy) { bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs()); bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs()); -- GitLab From e912aa8b17f176c7f3dbbd9794d3b0e2582c3fe5 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 28 Dec 2018 12:45:48 -0600 Subject: [PATCH 40/46] wraps the debug statements within debug macros --- viennacl/linalg/opencl/vector_operations.hpp | 2 -- viennacl/ocl/context.hpp | 10 ++++++++-- viennacl/ocl/mempool/mempool.hpp | 7 ++++++- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/viennacl/linalg/opencl/vector_operations.hpp b/viennacl/linalg/opencl/vector_operations.hpp index 69810d35..2b342345 100644 --- a/viennacl/linalg/opencl/vector_operations.hpp +++ b/viennacl/linalg/opencl/vector_operations.hpp @@ -807,7 +807,6 @@ void norm_reduction_impl(vector_base const & vec, viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector::program_name(), "norm"); assert( (k.global_work_size() / k.local_work_size() <= partial_result.size()) && bool("Size mismatch for partial reduction in norm_reduction_impl()") ); - std::cout << "Computing norm of " << viennacl::traits::opencl_handle(vec).get() << std::endl; viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec), cl_uint(viennacl::traits::start(vec)), @@ -928,7 +927,6 @@ template void norm_2_cpu(vector_base const & vec, T & result) { - std::cout << "norm_2_kernel asked for a vector.\n"; vcl_size_t work_groups = 128; viennacl::vector temp(work_groups, viennacl::traits::context(vec)); diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index bc8a2e9b..796750f1 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -95,7 +95,9 @@ namespace ocl { cl_int err = clReleaseMemObject(p); VIENNACL_ERR_CHECK(err); - std :: cout << "[allocator]: deallocation memory: " << p << std::endl; +#ifdef VIENNACL_DEBUG_ALL + std :: cout << "[allocator]: deallocating memory: " << p << std::endl; +#endif } virtual cl_allocator_base *copy() const = 0; @@ -302,9 +304,14 @@ public: { if(use_mempool){ +#ifdef VIENNACL_DEBUG_ALL std::cout << "[mempool]: querying for memory\n"; +#endif cl_mem mem = get_mempool()->allocate(size); +#ifdef VIENNACL_DEBUG_ALL std::cout << "[mempool]: gave memory at: " << mem << std::endl; + +#endif return mem; } #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) @@ -314,7 +321,6 @@ public: flags |= CL_MEM_COPY_HOST_PTR; cl_int err; cl_mem mem = clCreateBuffer(h_.get(), flags, size, ptr, &err); - std::cout << "[viennacl]: created a buffer: " << mem << std::endl; VIENNACL_ERR_CHECK(err); return mem; } diff --git a/viennacl/ocl/mempool/mempool.hpp b/viennacl/ocl/mempool/mempool.hpp index 8fef3def..51b365f5 100644 --- a/viennacl/ocl/mempool/mempool.hpp +++ b/viennacl/ocl/mempool/mempool.hpp @@ -194,9 +194,10 @@ namespace ocl void free(cl_mem p, size_type size) { - +#ifdef VIENNACL_DEBUG_ALL std::cout << "[mempool]: freeing the memory " << p << ". So that it could be used again."<< std::endl; +#endif --m_active_blocks; bin_nr_t bin_nr = bin_number(size); @@ -267,7 +268,9 @@ namespace ocl void increment_ref_counter(cl_mem p, size_type s) { +#ifdef VIENNACL_DEBUG_ALL std::cout << "[mempool]: Incrementing for " << p << std::endl; +#endif if(m_reference_count.find(p) == m_reference_count.end()) { std::cerr << "Did not find a memory to reference count.\n"; @@ -279,7 +282,9 @@ namespace ocl void decrement_ref_counter(cl_mem p, size_type s) { +#ifdef VIENNACL_DEBUG_ALL std::cout << "[mempool]: Decrementing for " << p << std::endl; +#endif if(m_reference_count.find(p) == m_reference_count.end()) { std::cerr << "Did not find a memory to reference count.\n"; -- GitLab From 1ebd59da33053d04db1ce0e2353f51e96de0528f Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 28 Dec 2018 16:09:19 -0600 Subject: [PATCH 41/46] makes the template parameter of any hanging backend::mem_handle to default --- libviennacl/src/blas1.cpp | 28 ++++++++++++++-------------- libviennacl/src/blas2.cpp | 16 ++++++++-------- libviennacl/src/blas3.cpp | 10 +++++----- libviennacl/src/init_matrix.hpp | 8 ++++---- libviennacl/src/init_vector.hpp | 8 ++++---- viennacl/ell_matrix.hpp | 2 +- viennacl/forwards.h | 6 ++++-- viennacl/hyb_matrix.hpp | 2 +- viennacl/ocl/forwards.h | 2 ++ viennacl/ocl/handle.hpp | 5 +++-- 10 files changed, 46 insertions(+), 41 deletions(-) diff --git a/libviennacl/src/blas1.cpp b/libviennacl/src/blas1.cpp index a7319d57..7a69fd6b 100644 --- a/libviennacl/src/blas1.cpp +++ b/libviennacl/src/blas1.cpp @@ -39,7 +39,7 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLiamax(ViennaCLInt *index, ViennaCLVector x) { - viennacl::backend::mem_handle v1_handle; + viennacl::backend::mem_handle<> v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -79,7 +79,7 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLasum(ViennaCLHostScalar *alpha if ((*alpha)->precision != x->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; + viennacl::backend::mem_handle<> v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -121,8 +121,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLaxpy(ViennaCLHostScalar alpha, if (x->precision != y->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -165,8 +165,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLcopy(ViennaCLVector x, ViennaC if (x->precision != y->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -211,8 +211,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLdot(ViennaCLHostScalar *alpha, if (x->precision != y->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -254,7 +254,7 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLnrm2(ViennaCLHostScalar *alpha if ((*alpha)->precision != x->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; + viennacl::backend::mem_handle<> v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -300,8 +300,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLrot(ViennaCLVector x, Vien if (x->precision != y->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -343,7 +343,7 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLscal(ViennaCLHostScalar alpha, if (alpha->precision != x->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; + viennacl::backend::mem_handle<> v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -381,8 +381,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLswap(ViennaCLVector x, ViennaC if (x->precision != y->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; diff --git a/libviennacl/src/blas2.cpp b/libviennacl/src/blas2.cpp index bc2c0952..d5014d28 100644 --- a/libviennacl/src/blas2.cpp +++ b/libviennacl/src/blas2.cpp @@ -35,9 +35,9 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemv(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLVector x, ViennaCLHostScalar beta, ViennaCLVector y) { - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; - viennacl::backend::mem_handle A_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; + viennacl::backend::mem_handle<> A_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -100,8 +100,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemv(ViennaCLHostScalar alpha, VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsv(ViennaCLMatrix A, ViennaCLVector x, ViennaCLUplo uplo) { - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle A_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> A_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -176,9 +176,9 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsv(ViennaCLMatrix A, ViennaC VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLger(ViennaCLHostScalar alpha, ViennaCLVector x, ViennaCLVector y, ViennaCLMatrix A) { - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; - viennacl::backend::mem_handle A_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; + viennacl::backend::mem_handle<> A_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; diff --git a/libviennacl/src/blas3.cpp b/libviennacl/src/blas3.cpp index bb6e03eb..f61c00b0 100644 --- a/libviennacl/src/blas3.cpp +++ b/libviennacl/src/blas3.cpp @@ -34,9 +34,9 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemm(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLMatrix B, ViennaCLHostScalar beta, ViennaCLMatrix C) { - viennacl::backend::mem_handle A_handle; - viennacl::backend::mem_handle B_handle; - viennacl::backend::mem_handle C_handle; + viennacl::backend::mem_handle<> A_handle; + viennacl::backend::mem_handle<> B_handle; + viennacl::backend::mem_handle<> C_handle; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -117,8 +117,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemm(ViennaCLHostScalar alpha, VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsm(ViennaCLMatrix A, ViennaCLUplo uplo, ViennaCLDiag diag, ViennaCLMatrix B) { - viennacl::backend::mem_handle A_handle; - viennacl::backend::mem_handle B_handle; + viennacl::backend::mem_handle<> A_handle; + viennacl::backend::mem_handle<> B_handle; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; diff --git a/libviennacl/src/init_matrix.hpp b/libviennacl/src/init_matrix.hpp index e463e880..c461c57e 100644 --- a/libviennacl/src/init_matrix.hpp +++ b/libviennacl/src/init_matrix.hpp @@ -20,7 +20,7 @@ -static ViennaCLStatus init_cuda_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) +static ViennaCLStatus init_cuda_matrix(viennacl::backend::mem_handle<> & h, ViennaCLMatrix A) { #ifdef VIENNACL_WITH_CUDA h.switch_active_handle_id(viennacl::CUDA_MEMORY); @@ -41,7 +41,7 @@ static ViennaCLStatus init_cuda_matrix(viennacl::backend::mem_handle & h, Vienna #endif } -static ViennaCLStatus init_opencl_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) +static ViennaCLStatus init_opencl_matrix(viennacl::backend::mem_handle<> & h, ViennaCLMatrix A) { #ifdef VIENNACL_WITH_OPENCL h.switch_active_handle_id(viennacl::OPENCL_MEMORY); @@ -63,7 +63,7 @@ static ViennaCLStatus init_opencl_matrix(viennacl::backend::mem_handle & h, Vien } -static ViennaCLStatus init_host_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) +static ViennaCLStatus init_host_matrix(viennacl::backend::mem_handle<> & h, ViennaCLMatrix A) { h.switch_active_handle_id(viennacl::MAIN_MEMORY); h.ram_handle().reset(A->host_mem); @@ -79,7 +79,7 @@ static ViennaCLStatus init_host_matrix(viennacl::backend::mem_handle & h, Vienna } -static ViennaCLStatus init_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) +static ViennaCLStatus init_matrix(viennacl::backend::mem_handle<> & h, ViennaCLMatrix A) { switch (A->backend->backend_type) { diff --git a/libviennacl/src/init_vector.hpp b/libviennacl/src/init_vector.hpp index 8be00d73..2ae9ebcc 100644 --- a/libviennacl/src/init_vector.hpp +++ b/libviennacl/src/init_vector.hpp @@ -20,7 +20,7 @@ -static ViennaCLStatus init_cuda_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) +static ViennaCLStatus init_cuda_vector(viennacl::backend::mem_handle<> & h, ViennaCLVector x) { #ifdef VIENNACL_WITH_CUDA h.switch_active_handle_id(viennacl::CUDA_MEMORY); @@ -41,7 +41,7 @@ static ViennaCLStatus init_cuda_vector(viennacl::backend::mem_handle & h, Vienna #endif } -static ViennaCLStatus init_opencl_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) +static ViennaCLStatus init_opencl_vector(viennacl::backend::mem_handle<> & h, ViennaCLVector x) { #ifdef VIENNACL_WITH_OPENCL h.switch_active_handle_id(viennacl::OPENCL_MEMORY); @@ -63,7 +63,7 @@ static ViennaCLStatus init_opencl_vector(viennacl::backend::mem_handle & h, Vien } -static ViennaCLStatus init_host_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) +static ViennaCLStatus init_host_vector(viennacl::backend::mem_handle<> & h, ViennaCLVector x) { h.switch_active_handle_id(viennacl::MAIN_MEMORY); h.ram_handle().reset(x->host_mem); @@ -79,7 +79,7 @@ static ViennaCLStatus init_host_vector(viennacl::backend::mem_handle & h, Vienna } -static ViennaCLStatus init_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) +static ViennaCLStatus init_vector(viennacl::backend::mem_handle<> & h, ViennaCLVector x) { switch (x->backend->backend_type) { diff --git a/viennacl/ell_matrix.hpp b/viennacl/ell_matrix.hpp index 3c3a4282..fc584219 100644 --- a/viennacl/ell_matrix.hpp +++ b/viennacl/ell_matrix.hpp @@ -53,7 +53,7 @@ template handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; diff --git a/viennacl/forwards.h b/viennacl/forwards.h index 3fc95e1a..8e8a4e56 100644 --- a/viennacl/forwards.h +++ b/viennacl/forwards.h @@ -252,6 +252,8 @@ namespace viennacl namespace ocl { template class handle; + + class pooled_clmem_handle; } //forward declaration of basic types: @@ -825,8 +827,8 @@ namespace viennacl void norm_frobenius_cpu(matrix_base const & vec, T & result); - template - vcl_size_t index_norm_inf(vector_base const & vec); + template + vcl_size_t index_norm_inf(vector_base const & vec); template vcl_size_t index_norm_inf(viennacl::vector_expression const & vec); diff --git a/viennacl/hyb_matrix.hpp b/viennacl/hyb_matrix.hpp index e93ede5f..d54f6db5 100644 --- a/viennacl/hyb_matrix.hpp +++ b/viennacl/hyb_matrix.hpp @@ -38,7 +38,7 @@ template handle_type; typedef scalar::ResultType> value_type; hyb_matrix() : csr_threshold_(NumericT(0.8)), rows_(0), cols_(0) {} diff --git a/viennacl/ocl/forwards.h b/viennacl/ocl/forwards.h index 5c7d0699..faa0d30a 100644 --- a/viennacl/ocl/forwards.h +++ b/viennacl/ocl/forwards.h @@ -49,6 +49,8 @@ namespace viennacl template class handle; + class pooled_clmem_handle; + template void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue); diff --git a/viennacl/ocl/handle.hpp b/viennacl/ocl/handle.hpp index 9fe0db0d..e4e5f6ba 100644 --- a/viennacl/ocl/handle.hpp +++ b/viennacl/ocl/handle.hpp @@ -281,8 +281,9 @@ namespace viennacl return *this; } - virtual void inc(); - virtual void dec(); + inline virtual void inc(); + inline virtual void dec(); + virtual ~pooled_clmem_handle() { if (h_!=0) dec(); } -- GitLab From 362615cca4699efbdae60c2621755d9abbcf6b70 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 28 Dec 2018 16:32:18 -0600 Subject: [PATCH 42/46] makes the template parameter of any hanging backend::mem_handle to default --- viennacl/circulant_matrix.hpp | 2 +- viennacl/compressed_compressed_matrix.hpp | 2 +- viennacl/hankel_matrix.hpp | 2 +- viennacl/linalg/cuda/common.hpp | 4 ++-- viennacl/linalg/cuda/ilu_operations.hpp | 6 +++--- viennacl/linalg/cuda/misc_operations.hpp | 8 ++++---- viennacl/linalg/cuda/sparse_matrix_operations.hpp | 4 ++-- viennacl/linalg/cuda/vector_operations.hpp | 4 ++-- viennacl/linalg/mixed_precision_cg.hpp | 6 +++--- viennacl/linalg/vector_operations.hpp | 8 ++++---- viennacl/sliced_ell_matrix.hpp | 2 +- viennacl/toeplitz_matrix.hpp | 2 +- viennacl/traits/size.hpp | 4 ++-- viennacl/vandermonde_matrix.hpp | 2 +- 14 files changed, 28 insertions(+), 28 deletions(-) diff --git a/viennacl/circulant_matrix.hpp b/viennacl/circulant_matrix.hpp index 1ee13d50..a19fa80c 100644 --- a/viennacl/circulant_matrix.hpp +++ b/viennacl/circulant_matrix.hpp @@ -41,7 +41,7 @@ template class circulant_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; /** diff --git a/viennacl/compressed_compressed_matrix.hpp b/viennacl/compressed_compressed_matrix.hpp index f1719a2c..ff3e5e68 100644 --- a/viennacl/compressed_compressed_matrix.hpp +++ b/viennacl/compressed_compressed_matrix.hpp @@ -265,7 +265,7 @@ template class compressed_compressed_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; diff --git a/viennacl/hankel_matrix.hpp b/viennacl/hankel_matrix.hpp index 084e6c87..e50e04b8 100644 --- a/viennacl/hankel_matrix.hpp +++ b/viennacl/hankel_matrix.hpp @@ -43,7 +43,7 @@ template class hankel_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; /** diff --git a/viennacl/linalg/cuda/common.hpp b/viennacl/linalg/cuda/common.hpp index 562d558c..52811867 100644 --- a/viennacl/linalg/cuda/common.hpp +++ b/viennacl/linalg/cuda/common.hpp @@ -103,14 +103,14 @@ const NumericT * cuda_arg(matrix_base const & obj) /** @brief Convenience helper function for extracting the CUDA handle from a generic memory handle. Non-const version. */ template -ReturnT * cuda_arg(viennacl::backend::mem_handle & h) +ReturnT * cuda_arg(viennacl::backend::mem_handle<> & h) { return reinterpret_cast(h.cuda_handle().get()); } /** @brief Convenience helper function for extracting the CUDA handle from a generic memory handle. Const-version. */ template -ReturnT const * cuda_arg(viennacl::backend::mem_handle const & h) +ReturnT const * cuda_arg(viennacl::backend::mem_handle<> const & h) { return reinterpret_cast(h.cuda_handle().get()); } diff --git a/viennacl/linalg/cuda/ilu_operations.hpp b/viennacl/linalg/cuda/ilu_operations.hpp index 302a73cc..f15eb944 100644 --- a/viennacl/linalg/cuda/ilu_operations.hpp +++ b/viennacl/linalg/cuda/ilu_operations.hpp @@ -285,7 +285,7 @@ template void icc_chow_patel_sweep(compressed_matrix & L, vector const & aij_L) { - viennacl::backend::mem_handle L_backup; + viennacl::backend::mem_handle<> L_backup; viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L)); viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size()); @@ -578,11 +578,11 @@ void ilu_chow_patel_sweep(compressed_matrix & L, compressed_matrix & U_trans, vector const & aij_U_trans) { - viennacl::backend::mem_handle L_backup; + viennacl::backend::mem_handle<> L_backup; viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L)); viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size()); - viennacl::backend::mem_handle U_backup; + viennacl::backend::mem_handle<> U_backup; viennacl::backend::memory_create(U_backup, U_trans.handle().raw_size(), viennacl::traits::context(U_trans)); viennacl::backend::memory_copy(U_trans.handle(), U_backup, 0, 0, U_trans.handle().raw_size()); diff --git a/viennacl/linalg/cuda/misc_operations.hpp b/viennacl/linalg/cuda/misc_operations.hpp index 4821f5b4..7b834904 100644 --- a/viennacl/linalg/cuda/misc_operations.hpp +++ b/viennacl/linalg/cuda/misc_operations.hpp @@ -66,10 +66,10 @@ __global__ void level_scheduling_substitute_kernel( template void level_scheduling_substitute(vector & vec, - viennacl::backend::mem_handle const & row_index_array, - viennacl::backend::mem_handle const & row_buffer, - viennacl::backend::mem_handle const & col_buffer, - viennacl::backend::mem_handle const & element_buffer, + viennacl::backend::mem_handle<> const & row_index_array, + viennacl::backend::mem_handle<> const & row_buffer, + viennacl::backend::mem_handle<> const & col_buffer, + viennacl::backend::mem_handle<> const & element_buffer, vcl_size_t num_rows ) { diff --git a/viennacl/linalg/cuda/sparse_matrix_operations.hpp b/viennacl/linalg/cuda/sparse_matrix_operations.hpp index 51d99e13..467f8772 100644 --- a/viennacl/linalg/cuda/sparse_matrix_operations.hpp +++ b/viennacl/linalg/cuda/sparse_matrix_operations.hpp @@ -970,7 +970,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & L, - viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks, + viennacl::backend::mem_handle<> const & block_indices, vcl_size_t num_blocks, vector_base const & /* L_diagonal */, //ignored vector_base & vec, viennacl::linalg::unit_lower_tag) @@ -989,7 +989,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & U, - viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks, + viennacl::backend::mem_handle<> const & block_indices, vcl_size_t num_blocks, vector_base const & U_diagonal, vector_base & vec, viennacl::linalg::upper_tag) diff --git a/viennacl/linalg/cuda/vector_operations.hpp b/viennacl/linalg/cuda/vector_operations.hpp index 61274b75..026a3a98 100644 --- a/viennacl/linalg/cuda/vector_operations.hpp +++ b/viennacl/linalg/cuda/vector_operations.hpp @@ -2753,7 +2753,7 @@ vcl_size_t index_norm_inf(vector_base const & vec1) { typedef NumericT value_type; - viennacl::backend::mem_handle h; + viennacl::backend::mem_handle<> h; viennacl::backend::memory_create(h, sizeof(unsigned int), viennacl::traits::context(vec1)); index_norm_inf_kernel<<<1, 128>>>(viennacl::cuda_arg(vec1), @@ -2965,7 +2965,7 @@ namespace detail vcl_size_t block_num = 128; vcl_size_t threads_per_block = 128; - viennacl::backend::mem_handle cuda_carries; + viennacl::backend::mem_handle<> cuda_carries; viennacl::backend::memory_create(cuda_carries, sizeof(NumericT)*block_num, viennacl::traits::context(input)); // First step: Scan within each thread group and write carries diff --git a/viennacl/linalg/mixed_precision_cg.hpp b/viennacl/linalg/mixed_precision_cg.hpp index 78254b34..fa4fc7a5 100644 --- a/viennacl/linalg/mixed_precision_cg.hpp +++ b/viennacl/linalg/mixed_precision_cg.hpp @@ -128,10 +128,10 @@ namespace viennacl // transfer matrix to single precision: viennacl::compressed_matrix matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs)); - viennacl::backend::memory_copy(matrix.handle1(), const_cast(matrix_low_precision.handle1()), 0, 0, matrix_low_precision.handle1().raw_size() ); - viennacl::backend::memory_copy(matrix.handle2(), const_cast(matrix_low_precision.handle2()), 0, 0, matrix_low_precision.handle2().raw_size() ); + viennacl::backend::memory_copy(matrix.handle1(), const_cast &>(matrix_low_precision.handle1()), 0, 0, matrix_low_precision.handle1().raw_size() ); + viennacl::backend::memory_copy(matrix.handle2(), const_cast &>(matrix_low_precision.handle2()), 0, 0, matrix_low_precision.handle2().raw_size() ); - viennacl::vector_base matrix_elements_high_precision(const_cast(matrix.handle()), matrix.nnz(), 0, 1); + viennacl::vector_base matrix_elements_high_precision(const_cast &>(matrix.handle()), matrix.nnz(), 0, 1); viennacl::vector_base matrix_elements_low_precision(matrix_low_precision.handle(), matrix.nnz(), 0, 1); matrix_elements_low_precision = matrix_elements_high_precision; matrix_low_precision.generate_row_block_information(); diff --git a/viennacl/linalg/vector_operations.hpp b/viennacl/linalg/vector_operations.hpp index c8a97e65..3c75d03f 100644 --- a/viennacl/linalg/vector_operations.hpp +++ b/viennacl/linalg/vector_operations.hpp @@ -427,11 +427,11 @@ namespace viennacl // Helper macro for generating unary element-wise operations such as element_exp(), element_sin(), etc. without unnecessary code duplication */ #define VIENNACL_MAKE_UNARY_ELEMENT_OP(funcname) \ - template \ - viennacl::vector_expression, const vector_base, op_element_unary > \ - element_##funcname(vector_base const & v) \ + template \ + viennacl::vector_expression, const vector_base, op_element_unary > \ + element_##funcname(vector_base const & v) \ { \ - return viennacl::vector_expression, const vector_base, op_element_unary >(v, v); \ + return viennacl::vector_expression, const vector_base, op_element_unary >(v, v); \ } \ template \ viennacl::vector_expression, \ diff --git a/viennacl/sliced_ell_matrix.hpp b/viennacl/sliced_ell_matrix.hpp index f66b0d4d..3c7ab74e 100644 --- a/viennacl/sliced_ell_matrix.hpp +++ b/viennacl/sliced_ell_matrix.hpp @@ -46,7 +46,7 @@ template class sliced_ell_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; diff --git a/viennacl/toeplitz_matrix.hpp b/viennacl/toeplitz_matrix.hpp index 1891a6aa..00b998ec 100644 --- a/viennacl/toeplitz_matrix.hpp +++ b/viennacl/toeplitz_matrix.hpp @@ -43,7 +43,7 @@ template class toeplitz_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; /** diff --git a/viennacl/traits/size.hpp b/viennacl/traits/size.hpp index 2e2e0d76..3f3e1769 100644 --- a/viennacl/traits/size.hpp +++ b/viennacl/traits/size.hpp @@ -384,8 +384,8 @@ vcl_size_t size(vector_expression -vcl_size_t internal_size(vector_base const & vec) +template +vcl_size_t internal_size(vector_base const & vec) { return vec.internal_size(); } diff --git a/viennacl/vandermonde_matrix.hpp b/viennacl/vandermonde_matrix.hpp index d3f3a66c..6a9eff15 100644 --- a/viennacl/vandermonde_matrix.hpp +++ b/viennacl/vandermonde_matrix.hpp @@ -44,7 +44,7 @@ template class vandermonde_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; /** -- GitLab From 4447bf4c27466d1bc66cc9ba9b6df994cbe22aa9 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 28 Dec 2018 16:45:59 -0600 Subject: [PATCH 43/46] adds license, minor docs --- viennacl/ocl/context.hpp | 4 ++-- viennacl/ocl/mempool/mempool.hpp | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 796750f1..19c21064 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -325,13 +325,13 @@ public: return mem; } - /// [KK]: TODOTODOTODOTODO Documentation + /** @brief Decerements the reference count of the memory in the memory pool **/ void decrement_mem_ref_counter(cl_mem p, vcl_size_t s) const { get_mempool()->decrement_ref_counter(p, s); } - + /** @brief Incerements the reference count of the memory in the memory pool **/ void increment_mem_ref_counter(cl_mem p, vcl_size_t s) const { get_mempool()->increment_ref_counter(p, s); diff --git a/viennacl/ocl/mempool/mempool.hpp b/viennacl/ocl/mempool/mempool.hpp index 51b365f5..12abdc62 100644 --- a/viennacl/ocl/mempool/mempool.hpp +++ b/viennacl/ocl/mempool/mempool.hpp @@ -1,3 +1,31 @@ +// Abstract memory pool implementation +// +// Copyright (C) 2009-17 Andreas Kloeckner +// 2018-19 Kaushik Kulkarni +// +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + + #ifndef VIENNACL_OCL_MEMPOOL_HPP_ #define VIENNACL_OCL_MEMPOOL_HPP_ -- GitLab From 6394e0ea71463795009f4dba71c077f7c511a199 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 28 Dec 2018 17:59:07 -0600 Subject: [PATCH 44/46] include one allocator for one device --- viennacl/ocl/context.hpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 19c21064..1b6b96fe 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -390,18 +390,17 @@ public: queues_[dev].push_back(viennacl::ocl::command_queue(temp)); - // TODO: Need figure out why this is giving an error. - //if(queues_.find(dev) == queues_.end()) - //{ - // did not find a queue for the present device, need to allot an - // allocator. + // register the allocator for the device + if(allocators_.find(dev) == allocators_.end()) + { + // did not find an queue for the present device => allot one allocators_[dev] = tools::shared_ptr(new cl_immediate_allocator(this, &(queues_[dev][0]), CL_MEM_READ_WRITE)); mempools_[dev] = tools::shared_ptr> (new memory_pool(*allocators_[dev])); - //} + } } /** @brief Adds a queue for the given device to the context */ -- GitLab From f579ccdc8f4b2877e78ee5cf14ae44c77643bc77 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 28 Dec 2018 18:10:25 -0600 Subject: [PATCH 45/46] asserts that the memory pool picks up something --- viennacl/ocl/context.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index 1b6b96fe..52f638f0 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -422,6 +422,7 @@ public: { typedef std::map< cl_device_id, tools::shared_ptr> > MempoolContainer; MempoolContainer::const_iterator it = mempools_.find(devices_[current_device_id_].id()); + assert (it != mempools_.end()&&bool("Did not find a memory pool.")); return it->second; } -- GitLab From eb66115ba74ccd10ff7a360fdc1eb5140f65bbb4 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 1 Jan 2019 13:46:35 -0600 Subject: [PATCH 46/46] adds the ability to output the result of VecMDot to pooled vector --- viennacl/detail/vector_def.hpp | 8 ++++---- viennacl/linalg/detail/op_executor.hpp | 16 ++++++++-------- viennacl/vector.hpp | 22 +++++++++++----------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/viennacl/detail/vector_def.hpp b/viennacl/detail/vector_def.hpp index 92ed546c..25a0bf6f 100644 --- a/viennacl/detail/vector_def.hpp +++ b/viennacl/detail/vector_def.hpp @@ -209,13 +209,13 @@ public: //read-write access to an element of the vector /** @brief Read-write access to a single element of the vector */ - entry_proxy operator()(size_type index); + entry_proxy operator()(size_type index); /** @brief Read-write access to a single element of the vector */ - entry_proxy operator[](size_type index); + entry_proxy operator[](size_type index); /** @brief Read access to a single element of the vector */ - const_entry_proxy operator()(size_type index) const; + const_entry_proxy operator()(size_type index) const; /** @brief Read access to a single element of the vector */ - const_entry_proxy operator[](size_type index) const; + const_entry_proxy operator[](size_type index) const; self_type & operator += (const self_type & vec); self_type & operator -= (const self_type & vec); diff --git a/viennacl/linalg/detail/op_executor.hpp b/viennacl/linalg/detail/op_executor.hpp index bd49b3bd..68c8a867 100644 --- a/viennacl/linalg/detail/op_executor.hpp +++ b/viennacl/linalg/detail/op_executor.hpp @@ -32,27 +32,27 @@ namespace linalg namespace detail { -template -bool op_aliasing(vector_base const & /*lhs*/, B const & /*b*/) +template +bool op_aliasing(vector_base const & /*lhs*/, B const & /*b*/) { return false; } -template -bool op_aliasing(vector_base const & lhs, vector_base const & b) +template +bool op_aliasing(vector_base const & lhs, vector_base const & b) { return lhs.handle() == b.handle(); } -template -bool op_aliasing(vector_base const & lhs, vector_expression const & rhs) +template +bool op_aliasing(vector_base const & lhs, vector_expression const & rhs) { return op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()); } -template -bool op_aliasing(matrix_base const & /*lhs*/, B const & /*b*/) +template +bool op_aliasing(matrix_base const & /*lhs*/, B const & /*b*/) { return false; } diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index f0c4f5a1..c735044f 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -561,39 +561,39 @@ return *this; //read-write access to an element of the vector template -entry_proxy vector_base::operator()(size_type index) +entry_proxy vector_base::operator()(size_type index) { assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); assert( index < size() && bool("Index out of bounds!") ); -return entry_proxy(start_ + stride_ * index, elements_); +return entry_proxy(start_ + stride_ * index, elements_); } template -entry_proxy vector_base::operator[](size_type index) +entry_proxy vector_base::operator[](size_type index) { assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); assert( index < size() && bool("Index out of bounds!") ); -return entry_proxy(start_ + stride_ * index, elements_); +return entry_proxy(start_ + stride_ * index, elements_); } template -const_entry_proxy vector_base::operator()(size_type index) const +const_entry_proxy vector_base::operator()(size_type index) const { assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); assert( index < size() && bool("Index out of bounds!") ); -return const_entry_proxy(start_ + stride_ * index, elements_); +return const_entry_proxy(start_ + stride_ * index, elements_); } template -const_entry_proxy vector_base::operator[](size_type index) const +const_entry_proxy vector_base::operator[](size_type index) const { assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); assert( index < size() && bool("Index out of bounds!") ); -return const_entry_proxy(start_ + stride_ * index, elements_); +return const_entry_proxy(start_ + stride_ * index, elements_); } //////////////////////////// Read-write access to an element of the vector end /////////////////// @@ -2058,10 +2058,10 @@ namespace detail }; // x = inner_prod(z, {y0, y1, ...}) - template - struct op_executor, op_assign, vector_expression, const vector_tuple, op_inner_prod> > + template + struct op_executor, op_assign, vector_expression, const vector_tuple, op_inner_prod> > { - static void apply(vector_base & lhs, vector_expression, const vector_tuple, op_inner_prod> const & rhs) + static void apply(vector_base & lhs, vector_expression, const vector_tuple, op_inner_prod> const & rhs) { viennacl::linalg::inner_prod_impl(rhs.lhs(), rhs.rhs(), lhs); } -- GitLab