diff --git a/.gitignore b/.gitignore index b86d66515005120e7a2f4d6c5b6d10577cdf3b40..a0f9b24f2eeef75a2e4115c352eccefee957a077 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ build .*.swp +tags diff --git a/libviennacl/src/blas1.cpp b/libviennacl/src/blas1.cpp index a7319d5704535976b277dff95196940769b0ccbe..7a69fd6b48068f2f299dd224786a7dcb977a3270 100644 --- a/libviennacl/src/blas1.cpp +++ b/libviennacl/src/blas1.cpp @@ -39,7 +39,7 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLiamax(ViennaCLInt *index, ViennaCLVector x) { - viennacl::backend::mem_handle v1_handle; + viennacl::backend::mem_handle<> v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -79,7 +79,7 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLasum(ViennaCLHostScalar *alpha if ((*alpha)->precision != x->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; + viennacl::backend::mem_handle<> v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -121,8 +121,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLaxpy(ViennaCLHostScalar alpha, if (x->precision != y->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -165,8 +165,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLcopy(ViennaCLVector x, ViennaC if (x->precision != y->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -211,8 +211,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLdot(ViennaCLHostScalar *alpha, if (x->precision != y->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -254,7 +254,7 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLnrm2(ViennaCLHostScalar *alpha if ((*alpha)->precision != x->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; + viennacl::backend::mem_handle<> v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -300,8 +300,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLrot(ViennaCLVector x, Vien if (x->precision != y->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -343,7 +343,7 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLscal(ViennaCLHostScalar alpha, if (alpha->precision != x->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; + viennacl::backend::mem_handle<> v1_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -381,8 +381,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLswap(ViennaCLVector x, ViennaC if (x->precision != y->precision) return ViennaCLGenericFailure; - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; diff --git a/libviennacl/src/blas2.cpp b/libviennacl/src/blas2.cpp index bc2c0952a0963f34f1c0e18dfa80424341a6a64b..d5014d2864ec612747142b31364852a2f43f9a4f 100644 --- a/libviennacl/src/blas2.cpp +++ b/libviennacl/src/blas2.cpp @@ -35,9 +35,9 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemv(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLVector x, ViennaCLHostScalar beta, ViennaCLVector y) { - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; - viennacl::backend::mem_handle A_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; + viennacl::backend::mem_handle<> A_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -100,8 +100,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemv(ViennaCLHostScalar alpha, VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsv(ViennaCLMatrix A, ViennaCLVector x, ViennaCLUplo uplo) { - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle A_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> A_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -176,9 +176,9 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsv(ViennaCLMatrix A, ViennaC VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLger(ViennaCLHostScalar alpha, ViennaCLVector x, ViennaCLVector y, ViennaCLMatrix A) { - viennacl::backend::mem_handle v1_handle; - viennacl::backend::mem_handle v2_handle; - viennacl::backend::mem_handle A_handle; + viennacl::backend::mem_handle<> v1_handle; + viennacl::backend::mem_handle<> v2_handle; + viennacl::backend::mem_handle<> A_handle; if (init_vector(v1_handle, x) != ViennaCLSuccess) return ViennaCLGenericFailure; diff --git a/libviennacl/src/blas3.cpp b/libviennacl/src/blas3.cpp index bb6e03eb5fe3edb97569066cd6e1802bed97d9f4..f61c00b02af9d0bf2bb1bec4f73a3412e38edb64 100644 --- a/libviennacl/src/blas3.cpp +++ b/libviennacl/src/blas3.cpp @@ -34,9 +34,9 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemm(ViennaCLHostScalar alpha, ViennaCLMatrix A, ViennaCLMatrix B, ViennaCLHostScalar beta, ViennaCLMatrix C) { - viennacl::backend::mem_handle A_handle; - viennacl::backend::mem_handle B_handle; - viennacl::backend::mem_handle C_handle; + viennacl::backend::mem_handle<> A_handle; + viennacl::backend::mem_handle<> B_handle; + viennacl::backend::mem_handle<> C_handle; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; @@ -117,8 +117,8 @@ VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLgemm(ViennaCLHostScalar alpha, VIENNACL_EXPORTED_FUNCTION ViennaCLStatus ViennaCLtrsm(ViennaCLMatrix A, ViennaCLUplo uplo, ViennaCLDiag diag, ViennaCLMatrix B) { - viennacl::backend::mem_handle A_handle; - viennacl::backend::mem_handle B_handle; + viennacl::backend::mem_handle<> A_handle; + viennacl::backend::mem_handle<> B_handle; if (init_matrix(A_handle, A) != ViennaCLSuccess) return ViennaCLGenericFailure; diff --git a/libviennacl/src/init_matrix.hpp b/libviennacl/src/init_matrix.hpp index e463e88008dfe7a26e863cd2ab6422441ef7089e..c461c57e9fdbd559c0e73f06be9d79b8363b3d24 100644 --- a/libviennacl/src/init_matrix.hpp +++ b/libviennacl/src/init_matrix.hpp @@ -20,7 +20,7 @@ -static ViennaCLStatus init_cuda_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) +static ViennaCLStatus init_cuda_matrix(viennacl::backend::mem_handle<> & h, ViennaCLMatrix A) { #ifdef VIENNACL_WITH_CUDA h.switch_active_handle_id(viennacl::CUDA_MEMORY); @@ -41,7 +41,7 @@ static ViennaCLStatus init_cuda_matrix(viennacl::backend::mem_handle & h, Vienna #endif } -static ViennaCLStatus init_opencl_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) +static ViennaCLStatus init_opencl_matrix(viennacl::backend::mem_handle<> & h, ViennaCLMatrix A) { #ifdef VIENNACL_WITH_OPENCL h.switch_active_handle_id(viennacl::OPENCL_MEMORY); @@ -63,7 +63,7 @@ static ViennaCLStatus init_opencl_matrix(viennacl::backend::mem_handle & h, Vien } -static ViennaCLStatus init_host_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) +static ViennaCLStatus init_host_matrix(viennacl::backend::mem_handle<> & h, ViennaCLMatrix A) { h.switch_active_handle_id(viennacl::MAIN_MEMORY); h.ram_handle().reset(A->host_mem); @@ -79,7 +79,7 @@ static ViennaCLStatus init_host_matrix(viennacl::backend::mem_handle & h, Vienna } -static ViennaCLStatus init_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) +static ViennaCLStatus init_matrix(viennacl::backend::mem_handle<> & h, ViennaCLMatrix A) { switch (A->backend->backend_type) { diff --git a/libviennacl/src/init_vector.hpp b/libviennacl/src/init_vector.hpp index 8be00d730d94d9dd43d901b08ece0d55a371da9e..2ae9ebcc1d6497495d742f19356a05f59d00b622 100644 --- a/libviennacl/src/init_vector.hpp +++ b/libviennacl/src/init_vector.hpp @@ -20,7 +20,7 @@ -static ViennaCLStatus init_cuda_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) +static ViennaCLStatus init_cuda_vector(viennacl::backend::mem_handle<> & h, ViennaCLVector x) { #ifdef VIENNACL_WITH_CUDA h.switch_active_handle_id(viennacl::CUDA_MEMORY); @@ -41,7 +41,7 @@ static ViennaCLStatus init_cuda_vector(viennacl::backend::mem_handle & h, Vienna #endif } -static ViennaCLStatus init_opencl_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) +static ViennaCLStatus init_opencl_vector(viennacl::backend::mem_handle<> & h, ViennaCLVector x) { #ifdef VIENNACL_WITH_OPENCL h.switch_active_handle_id(viennacl::OPENCL_MEMORY); @@ -63,7 +63,7 @@ static ViennaCLStatus init_opencl_vector(viennacl::backend::mem_handle & h, Vien } -static ViennaCLStatus init_host_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) +static ViennaCLStatus init_host_vector(viennacl::backend::mem_handle<> & h, ViennaCLVector x) { h.switch_active_handle_id(viennacl::MAIN_MEMORY); h.ram_handle().reset(x->host_mem); @@ -79,7 +79,7 @@ static ViennaCLStatus init_host_vector(viennacl::backend::mem_handle & h, Vienna } -static ViennaCLStatus init_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) +static ViennaCLStatus init_vector(viennacl::backend::mem_handle<> & h, ViennaCLVector x) { switch (x->backend->backend_type) { diff --git a/viennacl/backend/mem_handle.hpp b/viennacl/backend/mem_handle.hpp index 37c680ba9c3942e2b1b07df9c173eb04dc691b2c..ed8aa8ab809d44a1007a691c36d784655293f476 100644 --- a/viennacl/backend/mem_handle.hpp +++ b/viennacl/backend/mem_handle.hpp @@ -86,6 +86,7 @@ inline memory_types default_memory_type(memory_types new_memory_type) { return d * Instead, this class collects all the necessary conditional compilations. * */ +template class mem_handle { public: @@ -102,9 +103,9 @@ public: #ifdef VIENNACL_WITH_OPENCL /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */ - viennacl::ocl::handle & opencl_handle() { return opencl_handle_; } + OCLHandle & opencl_handle() { return opencl_handle_; } /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */ - viennacl::ocl::handle const & opencl_handle() const { return opencl_handle_; } + OCLHandle const & opencl_handle() const { return opencl_handle_; } #endif #ifdef VIENNACL_WITH_CUDA @@ -236,7 +237,7 @@ private: memory_types active_handle_; ram_handle_type ram_handle_; #ifdef VIENNACL_WITH_OPENCL - viennacl::ocl::handle opencl_handle_; + OCLHandle opencl_handle_; #endif #ifdef VIENNACL_WITH_CUDA cuda_handle_type cuda_handle_; diff --git a/viennacl/backend/memory.hpp b/viennacl/backend/memory.hpp index 1b1c6c5303a89f48db363393a12ec7dc8723d677..9513bbd84220e62b329255dc0b14cb8ecf0fdac2 100644 --- a/viennacl/backend/memory.hpp +++ b/viennacl/backend/memory.hpp @@ -84,7 +84,7 @@ namespace backend * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. * */ - inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL) + inline void memory_create(mem_handle<> & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL) { if (size_in_bytes > 0) { @@ -101,6 +101,7 @@ namespace backend case OPENCL_MEMORY: handle.opencl_handle().context(ctx.opencl_context()); handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr); + handle.raw_size(size_in_bytes); break; #endif @@ -118,6 +119,37 @@ namespace backend } } + // Pooled version of the above function! + inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL) + { + if (size_in_bytes > 0) + { + if (handle.get_active_handle_id() == MEMORY_NOT_INITIALIZED) + handle.switch_active_handle_id(ctx.memory_type()); + + switch (handle.get_active_handle_id()) + { +#ifdef VIENNACL_WITH_OPENCL + case OPENCL_MEMORY: + handle.opencl_handle().context(ctx.opencl_context()); + // If using memory pool then use a pooled handle + handle.opencl_handle() = + viennacl::ocl::pooled_clmem_handle( + opencl::pooled_memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr), + ctx.opencl_context(), + size_in_bytes); + + handle.raw_size(size_in_bytes); + break; +#endif + case MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("Pooled handle only available with OpenCL memory for now!"); + } + } + } + /* inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, const void * host_ptr = NULL) { @@ -137,8 +169,9 @@ namespace backend * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes) * @param bytes_to_copy Number of bytes to be copied */ - inline void memory_copy(mem_handle const & src_buffer, - mem_handle & dst_buffer, + template > + inline void memory_copy(mem_handle const & src_buffer, + mem_handle & dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy) @@ -174,8 +207,9 @@ namespace backend /** @brief A 'shallow' copy operation from an initialized buffer to an uninitialized buffer. * The uninitialized buffer just copies the raw handle. */ - inline void memory_shallow_copy(mem_handle const & src_buffer, - mem_handle & dst_buffer) + template > + inline void memory_shallow_copy(mem_handle const & src_buffer, + mem_handle & dst_buffer) { assert( (dst_buffer.get_active_handle_id() == MEMORY_NOT_INITIALIZED) && bool("Shallow copy on already initialized memory not supported!")); @@ -217,7 +251,8 @@ namespace backend * @param ptr Pointer to the first byte to be written * @param async Whether the operation should be asynchronous */ - inline void memory_write(mem_handle & dst_buffer, + template > + inline void memory_write(mem_handle & dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_write, const void * ptr, @@ -258,7 +293,8 @@ namespace backend * @param ptr Location in main RAM where to read data should be written to * @param async Whether the operation should be asynchronous */ - inline void memory_read(mem_handle const & src_buffer, + template + inline void memory_read(mem_handle const & src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void * ptr, @@ -364,8 +400,8 @@ namespace backend /** @brief Switches the active memory domain within a memory handle. Data is copied if the new active domain differs from the old one. Memory in the source handle is not free'd. */ - template - void switch_memory_context(mem_handle & handle, viennacl::context new_ctx) + template> + void switch_memory_context(mem_handle & handle, viennacl::context new_ctx) { if (handle.get_active_handle_id() == new_ctx.memory_type()) return; @@ -466,8 +502,8 @@ namespace backend /** @brief Copies data of the provided 'DataType' from 'handle_src' to 'handle_dst' and converts the data if the binary representation of 'DataType' among the memory domains differs. */ - template - void typesafe_memory_copy(mem_handle const & handle_src, mem_handle & handle_dst) + template> + void typesafe_memory_copy(mem_handle const & handle_src, mem_handle & handle_dst) { if (handle_dst.get_active_handle_id() == MEMORY_NOT_INITIALIZED) handle_dst.switch_active_handle_id(default_memory_type()); diff --git a/viennacl/backend/opencl.hpp b/viennacl/backend/opencl.hpp index a8be55a7881b45972fcf0065105093696d5c31ec..10897fc13b34292fe5b86fa59687bf02c410c8ab 100644 --- a/viennacl/backend/opencl.hpp +++ b/viennacl/backend/opencl.hpp @@ -58,6 +58,21 @@ inline cl_mem memory_create(viennacl::ocl::context const & ctx, vcl_size_t size_ return ctx.create_memory_without_smart_handle(CL_MEM_READ_WRITE, static_cast(size_in_bytes), const_cast(host_ptr)); } + +/** @brief Creates an array of the specified size in the current OpenCL context. If the second argument is provided, the buffer is initialized with data from that pointer. + * + * @param size_in_bytes Number of bytes to allocate + * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. + * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) + * + */ +inline cl_mem pooled_memory_create(viennacl::ocl::context const & ctx, vcl_size_t size_in_bytes, const void * host_ptr = NULL) +{ + //std::cout << "Creating buffer (" << size_in_bytes << " bytes) host buffer " << host_ptr << " in context " << &ctx << std::endl; + return ctx.create_memory_without_smart_handle(CL_MEM_READ_WRITE, static_cast(size_in_bytes), const_cast(host_ptr), true); +} + + /** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' in the OpenCL context to memory starting at address 'dst_buffer + dst_offset' in the same OpenCL context. * * @param src_buffer A smart pointer to the begin of an allocated OpenCL buffer diff --git a/viennacl/backend/util.hpp b/viennacl/backend/util.hpp index 9aaeb2e95b7e6783925537721e405d62545d4b3a..b484bdea47a44ba23480d9f5072495eb26cf29f1 100644 --- a/viennacl/backend/util.hpp +++ b/viennacl/backend/util.hpp @@ -97,7 +97,8 @@ class typesafe_host_array public: explicit typesafe_host_array() : bytes_buffer_(NULL), buffer_size_(0) {} - explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num) + template > + explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num) { resize(handle, num); } @@ -122,7 +123,8 @@ public: // /** @brief Resize without initializing the new memory */ - void raw_resize(mem_handle const & /*handle*/, vcl_size_t num) + template > + void raw_resize(mem_handle const & /*handle*/, vcl_size_t num) { buffer_size_ = sizeof(cpu_type) * num; @@ -135,7 +137,8 @@ public: } /** @brief Resize including initialization of new memory (cf. std::vector<>) */ - void resize(mem_handle const & handle, vcl_size_t num) + template > + void resize(mem_handle const & handle, vcl_size_t num) { raw_resize(handle, num); @@ -171,7 +174,8 @@ class typesafe_host_array public: explicit typesafe_host_array() : convert_to_opencl_( (default_memory_type() == OPENCL_MEMORY) ? true : false), bytes_buffer_(NULL), buffer_size_(0) {} - explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : convert_to_opencl_(false), bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num) + template > + explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : convert_to_opencl_(false), bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num) { resize(handle, num); } @@ -220,7 +224,8 @@ public: // /** @brief Resize without initializing the new memory */ - void raw_resize(mem_handle const & handle, vcl_size_t num) + template > + void raw_resize(mem_handle const & handle, vcl_size_t num) { buffer_size_ = sizeof(cpu_type) * num; (void)handle; //silence unused variable warning if compiled without OpenCL support @@ -246,7 +251,8 @@ public: } /** @brief Resize including initialization of new memory (cf. std::vector<>) */ - void resize(mem_handle const & handle, vcl_size_t num) + template > + void resize(mem_handle const & handle, vcl_size_t num) { raw_resize(handle, num); diff --git a/viennacl/circulant_matrix.hpp b/viennacl/circulant_matrix.hpp index 1ee13d50a9f06d4e32980682b66aeca67bed6dcf..a19fa80cca06968612a6f43edca7a87a8aa862d2 100644 --- a/viennacl/circulant_matrix.hpp +++ b/viennacl/circulant_matrix.hpp @@ -41,7 +41,7 @@ template class circulant_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; /** diff --git a/viennacl/compressed_compressed_matrix.hpp b/viennacl/compressed_compressed_matrix.hpp index f1719a2c3001b8b8eed7bd3b53406658e89bc033..ff3e5e68ed3d77328b331f402e977aa4d9ce4d70 100644 --- a/viennacl/compressed_compressed_matrix.hpp +++ b/viennacl/compressed_compressed_matrix.hpp @@ -265,7 +265,7 @@ template class compressed_compressed_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; diff --git a/viennacl/compressed_matrix.hpp b/viennacl/compressed_matrix.hpp index cdb12f44c407c0c8bed8fed5847dacf88f16c819..92fb25c5ce18352157c51e49137b4f792f9b4e83 100644 --- a/viennacl/compressed_matrix.hpp +++ b/viennacl/compressed_matrix.hpp @@ -630,7 +630,7 @@ class compressed_matrix { typedef compressed_matrix self_type; public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; diff --git a/viennacl/coordinate_matrix.hpp b/viennacl/coordinate_matrix.hpp index 2a24a4edafaff7689b022347ceba5780f7dd7d94..3a9eb074484e2a48a56e68e33a0f1c4859347e35 100644 --- a/viennacl/coordinate_matrix.hpp +++ b/viennacl/coordinate_matrix.hpp @@ -186,7 +186,7 @@ template class coordinate_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; diff --git a/viennacl/detail/matrix_def.hpp b/viennacl/detail/matrix_def.hpp index c13ef01fe088a0949a79d81b21a835d867ba4f94..b85afe38d5d6a5a0a9969b5a59ef7b8fb05849ec 100644 --- a/viennacl/detail/matrix_def.hpp +++ b/viennacl/detail/matrix_def.hpp @@ -111,7 +111,7 @@ public: typedef NumericT cpu_value_type; typedef SizeT size_type; typedef DistanceT difference_type; - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; /** @brief The default constructor. Does not allocate any memory. */ explicit matrix_base(): size1_(0), size2_(0), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(0), internal_size2_(0), row_major_fixed_(false), row_major_(true) {} @@ -129,7 +129,7 @@ public: explicit matrix_base(size_type rows, size_type columns, bool is_row_major, viennacl::context ctx = viennacl::context()); /** @brief Constructor for creating a matrix_range or matrix_stride from some other matrix/matrix_range/matrix_stride */ - explicit matrix_base(viennacl::backend::mem_handle & h, + explicit matrix_base(handle_type & h, size_type mat_size1, size_type mat_start1, size_type mat_stride1, size_type mat_internal_size1, size_type mat_size2, size_type mat_start2, size_type mat_stride2, size_type mat_internal_size2, bool is_row_major): size1_(mat_size1), size2_(mat_size2), @@ -249,7 +249,7 @@ public: void switch_memory_context(viennacl::context new_ctx) { viennacl::backend::switch_memory_context(elements_, new_ctx); } protected: - void set_handle(viennacl::backend::mem_handle const & h); + void set_handle(viennacl::backend::mem_handle<> const & h); void resize(size_type rows, size_type columns, bool preserve = true); private: size_type size1_; diff --git a/viennacl/detail/vector_def.hpp b/viennacl/detail/vector_def.hpp index 4624b762fa2cdd46214043d29cfe500d19e79516..25a0bf6f48f4845d8a5213dc37145b4bae0f0c0c 100644 --- a/viennacl/detail/vector_def.hpp +++ b/viennacl/detail/vector_def.hpp @@ -100,19 +100,19 @@ struct zero_vector : public scalar_vector * * @tparam NumericT The floating point type, either 'float' or 'double' */ -template +template class vector_base { - typedef vector_base self_type; + typedef vector_base self_type; public: typedef scalar value_type; typedef NumericT cpu_value_type; - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle handle_type; typedef SizeT size_type; typedef DistanceT difference_type; - typedef const_vector_iterator const_iterator; - typedef vector_iterator iterator; + typedef const_vector_iterator const_iterator; + typedef vector_iterator iterator; /** @brief Returns the length of the vector (cf. std::vector) */ size_type size() const { return size_; } @@ -141,7 +141,7 @@ public: * @param vec_start The offset from the beginning of the buffer identified by 'h' * @param vec_stride Increment between two elements in the original buffer (in multiples of NumericT) */ - explicit vector_base(viennacl::backend::mem_handle & h, size_type vec_size, size_type vec_start, size_type vec_stride); + explicit vector_base(viennacl::backend::mem_handle & h, size_type vec_size, size_type vec_start, size_type vec_stride); /** @brief Creates a vector and allocates the necessary memory */ explicit vector_base(size_type vec_size, viennacl::context ctx = viennacl::context()); @@ -209,13 +209,13 @@ public: //read-write access to an element of the vector /** @brief Read-write access to a single element of the vector */ - entry_proxy operator()(size_type index); + entry_proxy operator()(size_type index); /** @brief Read-write access to a single element of the vector */ - entry_proxy operator[](size_type index); + entry_proxy operator[](size_type index); /** @brief Read access to a single element of the vector */ - const_entry_proxy operator()(size_type index) const; + const_entry_proxy operator()(size_type index) const; /** @brief Read access to a single element of the vector */ - const_entry_proxy operator[](size_type index) const; + const_entry_proxy operator[](size_type index) const; self_type & operator += (const self_type & vec); self_type & operator -= (const self_type & vec); @@ -302,7 +302,7 @@ public: protected: - void set_handle(viennacl::backend::mem_handle const & h) { elements_ = h; } + void set_handle(viennacl::backend::mem_handle const & h) { elements_ = h; } /** @brief Swaps the handles of two vectors by swapping the OpenCL handles only, no data copy */ self_type & fast_swap(self_type & other); diff --git a/viennacl/device_specific/forwards.h b/viennacl/device_specific/forwards.h index 11368bb4f9983e08a91d17e316738d3552403b33..9c635e0c80000f939ad0ac427d5f7a845c24f236 100644 --- a/viennacl/device_specific/forwards.h +++ b/viennacl/device_specific/forwards.h @@ -217,16 +217,18 @@ class symbolic_binder { public: virtual ~symbolic_binder(){ } - virtual bool bind(viennacl::backend::mem_handle const * ph) = 0; - virtual unsigned int get(viennacl::backend::mem_handle const * ph) = 0; + virtual bool bind(viennacl::backend::mem_handle<> const * ph) = 0; + virtual unsigned int get(viennacl::backend::mem_handle<> const * ph) = 0; }; class bind_to_handle : public symbolic_binder { public: bind_to_handle() : current_arg_(0){ } - bool bind(viennacl::backend::mem_handle const * ph) {return (ph==NULL)?true:memory.insert(std::make_pair((void*)ph, current_arg_)).second; } - unsigned int get(viennacl::backend::mem_handle const * ph){ return bind(ph) ? current_arg_++ : at(memory, (void*)ph); } + + bool bind(viennacl::backend::mem_handle<> const * ph) {return (ph==NULL)?true:memory.insert(std::make_pair((void*)ph, current_arg_)).second; } + + unsigned int get(viennacl::backend::mem_handle<> const * ph){ return bind(ph) ? current_arg_++ : at(memory, (void*)ph); } private: unsigned int current_arg_; std::map memory; @@ -236,8 +238,10 @@ class bind_all_unique : public symbolic_binder { public: bind_all_unique() : current_arg_(0){ } - bool bind(viennacl::backend::mem_handle const *) {return true; } - unsigned int get(viennacl::backend::mem_handle const *){ return current_arg_++; } + bool bind(viennacl::backend::mem_handle const *) {return true; } + bool bind(viennacl::backend::mem_handle<> const *) {return true; } + unsigned int get(viennacl::backend::mem_handle const *){ return current_arg_++; } + unsigned int get(viennacl::backend::mem_handle<> const *){ return current_arg_++; } private: unsigned int current_arg_; std::map memory; diff --git a/viennacl/ell_matrix.hpp b/viennacl/ell_matrix.hpp index 3c3a4282bbaae0312c73eeb19fee59097f0b8b22..fc584219de3d69617b68027f75fd679db5d72c22 100644 --- a/viennacl/ell_matrix.hpp +++ b/viennacl/ell_matrix.hpp @@ -53,7 +53,7 @@ template handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; diff --git a/viennacl/forwards.h b/viennacl/forwards.h index 092b6e5159ac9455b2e12f4b20f48e3801a39363..8e8a4e56a526df83e7410bea093a0c6e448dffd6 100644 --- a/viennacl/forwards.h +++ b/viennacl/forwards.h @@ -69,6 +69,8 @@ #include "viennacl/meta/enable_if.hpp" #include "viennacl/version.hpp" +#include "CL/cl.h" + /** @brief Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them. */ namespace viennacl { @@ -246,26 +248,34 @@ namespace viennacl /** @brief A tag class representing sign flips (for scalars only. Vectors and matrices use the standard multiplication by the scalar -1.0) */ struct op_flip_sign {}; + /** @brief OpenCL backend. Manages platforms, contexts, buffers, kernels, etc. */ + namespace ocl { + template + class handle; + + class pooled_clmem_handle; + } + //forward declaration of basic types: - template + template> class scalar; template class scalar_expression; - template + template> class entry_proxy; - template + template> class const_entry_proxy; template class vector_expression; - template + template> class vector_iterator; - template + template> class const_vector_iterator; template @@ -283,13 +293,13 @@ namespace viennacl template struct scalar_vector; - template + template, typename SizeType = vcl_size_t, typename DistanceType = vcl_ptrdiff_t> class vector_base; - template + template> class vector; - template + template> class vector_tuple; //the following forwards are needed for GMRES @@ -308,15 +318,15 @@ namespace viennacl const_vector_iterator const & gpu_src_end, const_vector_iterator gpu_dest_begin); - template - void fast_copy(const const_vector_iterator & gpu_begin, - const const_vector_iterator & gpu_end, + template + void fast_copy(const const_vector_iterator & gpu_begin, + const const_vector_iterator & gpu_end, CPU_ITERATOR cpu_begin ); - template + template void fast_copy(CPU_ITERATOR const & cpu_begin, CPU_ITERATOR const & cpu_end, - vector_iterator gpu_begin); + vector_iterator gpu_begin); /** @brief Tag class for indicating row-major layout of a matrix. Not passed to the matrix directly, see row_major type. */ @@ -376,6 +386,7 @@ namespace viennacl namespace backend { + template > class mem_handle; } @@ -655,28 +666,27 @@ namespace viennacl viennacl::vector& input2, viennacl::vector& output); - template - viennacl::vector_expression, const vector_base, op_element_binary > - element_prod(vector_base const & v1, vector_base const & v2); - - template - viennacl::vector_expression, const vector_base, op_element_binary > - element_div(vector_base const & v1, vector_base const & v2); + template + viennacl::vector_expression, const vector_base, op_element_binary > + element_prod(vector_base const & v1, vector_base const & v2); + template + viennacl::vector_expression, const vector_base, op_element_binary > + element_div(vector_base const & v1, vector_base const & v2); - template - void inner_prod_impl(vector_base const & vec1, - vector_base const & vec2, + template + void inner_prod_impl(vector_base const & vec1, + vector_base const & vec2, scalar & result); - template + template void inner_prod_impl(viennacl::vector_expression const & vec1, - vector_base const & vec2, + vector_base const & vec2, scalar & result); - template - void inner_prod_impl(vector_base const & vec1, + template + void inner_prod_impl(vector_base const & vec1, viennacl::vector_expression const & vec2, scalar & result); @@ -688,18 +698,18 @@ namespace viennacl /////////////////////////// - template - void inner_prod_cpu(vector_base const & vec1, - vector_base const & vec2, + template + void inner_prod_cpu(vector_base const & vec1, + vector_base const & vec2, T & result); - template + template void inner_prod_cpu(viennacl::vector_expression const & vec1, - vector_base const & vec2, + vector_base const & vec2, T & result); - template - void inner_prod_cpu(vector_base const & vec1, + template + void inner_prod_cpu(vector_base const & vec1, viennacl::vector_expression const & vec2, T & result); @@ -712,16 +722,16 @@ namespace viennacl //forward definition of norm_1_impl function - template - void norm_1_impl(vector_base const & vec, scalar & result); + template + void norm_1_impl(vector_base const & vec, scalar & result); template void norm_1_impl(viennacl::vector_expression const & vec, scalar & result); - template - void norm_1_cpu(vector_base const & vec, + template + void norm_1_cpu(vector_base const & vec, T & result); template @@ -729,15 +739,15 @@ namespace viennacl S2 & result); //forward definition of norm_2_impl function - template - void norm_2_impl(vector_base const & vec, scalar & result); + template + void norm_2_impl(vector_base const & vec, scalar & result); template void norm_2_impl(viennacl::vector_expression const & vec, scalar & result); - template - void norm_2_cpu(vector_base const & vec, T & result); + template + void norm_2_cpu(vector_base const & vec, T & result); template void norm_2_cpu(viennacl::vector_expression const & vec, @@ -745,64 +755,64 @@ namespace viennacl //forward definition of norm_inf_impl function - template - void norm_inf_impl(vector_base const & vec, scalar & result); + template + void norm_inf_impl(vector_base const & vec, scalar & result); template void norm_inf_impl(viennacl::vector_expression const & vec, scalar & result); - template - void norm_inf_cpu(vector_base const & vec, T & result); + template + void norm_inf_cpu(vector_base const & vec, T & result); template void norm_inf_cpu(viennacl::vector_expression const & vec, S2 & result); //forward definition of max()-related functions - template - void max_impl(vector_base const & vec, scalar & result); + template + void max_impl(vector_base const & vec, scalar & result); template void max_impl(viennacl::vector_expression const & vec, scalar & result); - template - void max_cpu(vector_base const & vec, T & result); + template + void max_cpu(vector_base const & vec, T & result); template void max_cpu(viennacl::vector_expression const & vec, S2 & result); //forward definition of min()-related functions - template - void min_impl(vector_base const & vec, scalar & result); + template + void min_impl(vector_base const & vec, scalar & result); template void min_impl(viennacl::vector_expression const & vec, scalar & result); - template - void min_cpu(vector_base const & vec, T & result); + template + void min_cpu(vector_base const & vec, T & result); template void min_cpu(viennacl::vector_expression const & vec, S2 & result); //forward definition of sum()-related functions - template - void sum_impl(vector_base const & vec, scalar & result); + template + void sum_impl(vector_base const & vec, scalar & result); template void sum_impl(viennacl::vector_expression const & vec, scalar & result); - template - void sum_cpu(vector_base const & vec, T & result); + template + void sum_cpu(vector_base const & vec, T & result); template void sum_cpu(viennacl::vector_expression const & vec, @@ -817,8 +827,8 @@ namespace viennacl void norm_frobenius_cpu(matrix_base const & vec, T & result); - template - vcl_size_t index_norm_inf(vector_base const & vec); + template + vcl_size_t index_norm_inf(vector_base const & vec); template vcl_size_t index_norm_inf(viennacl::vector_expression const & vec); @@ -1023,8 +1033,6 @@ namespace viennacl } } - /** @brief OpenCL backend. Manages platforms, contexts, buffers, kernels, etc. */ - namespace ocl {} /** @brief Namespace containing many meta-functions. */ namespace result_of {} diff --git a/viennacl/hankel_matrix.hpp b/viennacl/hankel_matrix.hpp index 084e6c8717cdccc7eb35462b05ba5406f0165fd3..e50e04b8f05a94adf8c1a9df6e2134b9679cf8d2 100644 --- a/viennacl/hankel_matrix.hpp +++ b/viennacl/hankel_matrix.hpp @@ -43,7 +43,7 @@ template class hankel_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; /** diff --git a/viennacl/hyb_matrix.hpp b/viennacl/hyb_matrix.hpp index e93ede5f5fdf99b6c7d317752050051aefbdf493..d54f6db5cd1bfac4d77211306405c9959d836565 100644 --- a/viennacl/hyb_matrix.hpp +++ b/viennacl/hyb_matrix.hpp @@ -38,7 +38,7 @@ template handle_type; typedef scalar::ResultType> value_type; hyb_matrix() : csr_threshold_(NumericT(0.8)), rows_(0), cols_(0) {} diff --git a/viennacl/linalg/cuda/common.hpp b/viennacl/linalg/cuda/common.hpp index 562d558cf9faa739f7f5a91ca32e2514b01fde64..52811867a3a80337809b674d730c2ef09382dc84 100644 --- a/viennacl/linalg/cuda/common.hpp +++ b/viennacl/linalg/cuda/common.hpp @@ -103,14 +103,14 @@ const NumericT * cuda_arg(matrix_base const & obj) /** @brief Convenience helper function for extracting the CUDA handle from a generic memory handle. Non-const version. */ template -ReturnT * cuda_arg(viennacl::backend::mem_handle & h) +ReturnT * cuda_arg(viennacl::backend::mem_handle<> & h) { return reinterpret_cast(h.cuda_handle().get()); } /** @brief Convenience helper function for extracting the CUDA handle from a generic memory handle. Const-version. */ template -ReturnT const * cuda_arg(viennacl::backend::mem_handle const & h) +ReturnT const * cuda_arg(viennacl::backend::mem_handle<> const & h) { return reinterpret_cast(h.cuda_handle().get()); } diff --git a/viennacl/linalg/cuda/ilu_operations.hpp b/viennacl/linalg/cuda/ilu_operations.hpp index 302a73cc7afefa2ba9ddbd55aca7cb15c8594344..f15eb944c80b96d85f128290b00be61522346bd2 100644 --- a/viennacl/linalg/cuda/ilu_operations.hpp +++ b/viennacl/linalg/cuda/ilu_operations.hpp @@ -285,7 +285,7 @@ template void icc_chow_patel_sweep(compressed_matrix & L, vector const & aij_L) { - viennacl::backend::mem_handle L_backup; + viennacl::backend::mem_handle<> L_backup; viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L)); viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size()); @@ -578,11 +578,11 @@ void ilu_chow_patel_sweep(compressed_matrix & L, compressed_matrix & U_trans, vector const & aij_U_trans) { - viennacl::backend::mem_handle L_backup; + viennacl::backend::mem_handle<> L_backup; viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L)); viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size()); - viennacl::backend::mem_handle U_backup; + viennacl::backend::mem_handle<> U_backup; viennacl::backend::memory_create(U_backup, U_trans.handle().raw_size(), viennacl::traits::context(U_trans)); viennacl::backend::memory_copy(U_trans.handle(), U_backup, 0, 0, U_trans.handle().raw_size()); diff --git a/viennacl/linalg/cuda/misc_operations.hpp b/viennacl/linalg/cuda/misc_operations.hpp index 4821f5b4eddad8b4f9b681b7f0b9bb1c401bb3ac..7b834904eafe9c87d876cb6bff835c46781bd91b 100644 --- a/viennacl/linalg/cuda/misc_operations.hpp +++ b/viennacl/linalg/cuda/misc_operations.hpp @@ -66,10 +66,10 @@ __global__ void level_scheduling_substitute_kernel( template void level_scheduling_substitute(vector & vec, - viennacl::backend::mem_handle const & row_index_array, - viennacl::backend::mem_handle const & row_buffer, - viennacl::backend::mem_handle const & col_buffer, - viennacl::backend::mem_handle const & element_buffer, + viennacl::backend::mem_handle<> const & row_index_array, + viennacl::backend::mem_handle<> const & row_buffer, + viennacl::backend::mem_handle<> const & col_buffer, + viennacl::backend::mem_handle<> const & element_buffer, vcl_size_t num_rows ) { diff --git a/viennacl/linalg/cuda/sparse_matrix_operations.hpp b/viennacl/linalg/cuda/sparse_matrix_operations.hpp index 51d99e135fbde609f190a7820b7e0f5982f3bb77..467f8772a706aaca7d6e6da15be34b8d1018d68a 100644 --- a/viennacl/linalg/cuda/sparse_matrix_operations.hpp +++ b/viennacl/linalg/cuda/sparse_matrix_operations.hpp @@ -970,7 +970,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & L, - viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks, + viennacl::backend::mem_handle<> const & block_indices, vcl_size_t num_blocks, vector_base const & /* L_diagonal */, //ignored vector_base & vec, viennacl::linalg::unit_lower_tag) @@ -989,7 +989,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & U, - viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks, + viennacl::backend::mem_handle<> const & block_indices, vcl_size_t num_blocks, vector_base const & U_diagonal, vector_base & vec, viennacl::linalg::upper_tag) diff --git a/viennacl/linalg/cuda/vector_operations.hpp b/viennacl/linalg/cuda/vector_operations.hpp index 61274b75f4cb62eca85e189d4099d3bb92cc6c80..026a3a98cee7e37a963e76d1681d45e62aac9259 100644 --- a/viennacl/linalg/cuda/vector_operations.hpp +++ b/viennacl/linalg/cuda/vector_operations.hpp @@ -2753,7 +2753,7 @@ vcl_size_t index_norm_inf(vector_base const & vec1) { typedef NumericT value_type; - viennacl::backend::mem_handle h; + viennacl::backend::mem_handle<> h; viennacl::backend::memory_create(h, sizeof(unsigned int), viennacl::traits::context(vec1)); index_norm_inf_kernel<<<1, 128>>>(viennacl::cuda_arg(vec1), @@ -2965,7 +2965,7 @@ namespace detail vcl_size_t block_num = 128; vcl_size_t threads_per_block = 128; - viennacl::backend::mem_handle cuda_carries; + viennacl::backend::mem_handle<> cuda_carries; viennacl::backend::memory_create(cuda_carries, sizeof(NumericT)*block_num, viennacl::traits::context(input)); // First step: Scan within each thread group and write carries diff --git a/viennacl/linalg/detail/ilu/block_ilu.hpp b/viennacl/linalg/detail/ilu/block_ilu.hpp index 1540e2dd7a505ad7c13c75fc6b5cff78a9f91a22..1cb2f4b29a890a74098502a54d3c7fe2fab92ee6 100644 --- a/viennacl/linalg/detail/ilu/block_ilu.hpp +++ b/viennacl/linalg/detail/ilu/block_ilu.hpp @@ -595,7 +595,7 @@ private: ILUTagT tag_; index_vector_type block_indices_; - viennacl::backend::mem_handle gpu_block_indices_; + viennacl::backend::mem_handle<> gpu_block_indices_; viennacl::compressed_matrix gpu_L_trans_; viennacl::compressed_matrix gpu_U_trans_; viennacl::vector gpu_D_; diff --git a/viennacl/linalg/detail/ilu/common.hpp b/viennacl/linalg/detail/ilu/common.hpp index 93b0cba88b842124f8ad00059412abfce891a47c..cb8b81f5061481f39a4fc060885f4fd2f4b514ba 100644 --- a/viennacl/linalg/detail/ilu/common.hpp +++ b/viennacl/linalg/detail/ilu/common.hpp @@ -50,10 +50,10 @@ namespace detail template void level_scheduling_setup_impl(viennacl::compressed_matrix const & LU, viennacl::vector const & diagonal_LU, - std::list & row_index_arrays, - std::list & row_buffers, - std::list & col_buffers, - std::list & element_buffers, + std::list> & row_index_arrays, + std::list> & row_buffers, + std::list> & col_buffers, + std::list> & element_buffers, std::list & row_elimination_num_list, bool setup_U) { @@ -119,19 +119,19 @@ void level_scheduling_setup_impl(viennacl::compressed_matrix 0) { - row_index_arrays.push_back(viennacl::backend::mem_handle()); + row_index_arrays.push_back(viennacl::backend::mem_handle<>()); viennacl::backend::switch_memory_context(row_index_arrays.back(), viennacl::traits::context(LU)); viennacl::backend::typesafe_host_array elim_row_index_array(row_index_arrays.back(), num_tainted_cols); - row_buffers.push_back(viennacl::backend::mem_handle()); + row_buffers.push_back(viennacl::backend::mem_handle<>()); viennacl::backend::switch_memory_context(row_buffers.back(), viennacl::traits::context(LU)); viennacl::backend::typesafe_host_array elim_row_buffer(row_buffers.back(), num_tainted_cols + 1); - col_buffers.push_back(viennacl::backend::mem_handle()); + col_buffers.push_back(viennacl::backend::mem_handle<>()); viennacl::backend::switch_memory_context(col_buffers.back(), viennacl::traits::context(LU)); viennacl::backend::typesafe_host_array elim_col_buffer(col_buffers.back(), num_entries); - element_buffers.push_back(viennacl::backend::mem_handle()); + element_buffers.push_back(viennacl::backend::mem_handle<>()); viennacl::backend::switch_memory_context(element_buffers.back(), viennacl::traits::context(LU)); std::vector elim_elements_buffer(num_entries); @@ -190,10 +190,10 @@ void level_scheduling_setup_impl(viennacl::compressed_matrix void level_scheduling_setup_L(viennacl::compressed_matrix const & LU, viennacl::vector const & diagonal_LU, - std::list & row_index_arrays, - std::list & row_buffers, - std::list & col_buffers, - std::list & element_buffers, + std::list> & row_index_arrays, + std::list> & row_buffers, + std::list> & col_buffers, + std::list> & element_buffers, std::list & row_elimination_num_list) { level_scheduling_setup_impl(LU, diagonal_LU, row_index_arrays, row_buffers, col_buffers, element_buffers, row_elimination_num_list, false); @@ -207,10 +207,10 @@ void level_scheduling_setup_L(viennacl::compressed_matrix template void level_scheduling_setup_U(viennacl::compressed_matrix const & LU, viennacl::vector const & diagonal_LU, - std::list & row_index_arrays, - std::list & row_buffers, - std::list & col_buffers, - std::list & element_buffers, + std::list> & row_index_arrays, + std::list> & row_buffers, + std::list> & col_buffers, + std::list> & element_buffers, std::list & row_elimination_num_list) { level_scheduling_setup_impl(LU, diagonal_LU, row_index_arrays, row_buffers, col_buffers, element_buffers, row_elimination_num_list, true); @@ -222,13 +222,13 @@ void level_scheduling_setup_U(viennacl::compressed_matrix // template void level_scheduling_substitute(viennacl::vector & vec, - std::list const & row_index_arrays, - std::list const & row_buffers, - std::list const & col_buffers, - std::list const & element_buffers, + std::list> const & row_index_arrays, + std::list> const & row_buffers, + std::list> const & col_buffers, + std::list> const & element_buffers, std::list const & row_elimination_num_list) { - typedef typename std::list< viennacl::backend::mem_handle >::const_iterator ListIterator; + typedef typename std::list< viennacl::backend::mem_handle<> >::const_iterator ListIterator; ListIterator row_index_array_it = row_index_arrays.begin(); ListIterator row_buffers_it = row_buffers.begin(); ListIterator col_buffers_it = col_buffers.begin(); diff --git a/viennacl/linalg/detail/ilu/ilu0.hpp b/viennacl/linalg/detail/ilu/ilu0.hpp index 1c3191a7e5ca7b005bd52ebdca5b409a4373bc78..67f2f7fcfea3ddb0dfa1a1e384ae6e0e8bd1cdd0 100644 --- a/viennacl/linalg/detail/ilu/ilu0.hpp +++ b/viennacl/linalg/detail/ilu/ilu0.hpp @@ -304,22 +304,22 @@ private: // // L: - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_index_arrays_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_row_index_arrays_.begin(); it != multifrontal_L_row_index_arrays_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_row_buffers_.begin(); it != multifrontal_L_row_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_col_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_col_buffers_.begin(); it != multifrontal_L_col_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_element_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_element_buffers_.begin(); it != multifrontal_L_element_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); @@ -329,22 +329,22 @@ private: viennacl::switch_memory_context(multifrontal_U_diagonal_, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_index_arrays_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_row_index_arrays_.begin(); it != multifrontal_U_row_index_arrays_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_row_buffers_.begin(); it != multifrontal_U_row_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_col_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_col_buffers_.begin(); it != multifrontal_U_col_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_element_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_element_buffers_.begin(); it != multifrontal_U_element_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); @@ -354,17 +354,17 @@ private: ilu0_tag tag_; viennacl::compressed_matrix LU_; - std::list multifrontal_L_row_index_arrays_; - std::list multifrontal_L_row_buffers_; - std::list multifrontal_L_col_buffers_; - std::list multifrontal_L_element_buffers_; + std::list> multifrontal_L_row_index_arrays_; + std::list> multifrontal_L_row_buffers_; + std::list> multifrontal_L_col_buffers_; + std::list> multifrontal_L_element_buffers_; std::list multifrontal_L_row_elimination_num_list_; viennacl::vector multifrontal_U_diagonal_; - std::list multifrontal_U_row_index_arrays_; - std::list multifrontal_U_row_buffers_; - std::list multifrontal_U_col_buffers_; - std::list multifrontal_U_element_buffers_; + std::list> multifrontal_U_row_index_arrays_; + std::list> multifrontal_U_row_buffers_; + std::list> multifrontal_U_col_buffers_; + std::list> multifrontal_U_element_buffers_; std::list multifrontal_U_row_elimination_num_list_; }; diff --git a/viennacl/linalg/detail/ilu/ilut.hpp b/viennacl/linalg/detail/ilu/ilut.hpp index 9c0dd966537fd6dbdc528df40bb70c1a27602be7..650d45219424c19b462c6f3f73e788ab3cbb75bb 100644 --- a/viennacl/linalg/detail/ilu/ilut.hpp +++ b/viennacl/linalg/detail/ilu/ilut.hpp @@ -611,22 +611,22 @@ private: // L: - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_index_arrays_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_row_index_arrays_.begin(); it != multifrontal_L_row_index_arrays_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_row_buffers_.begin(); it != multifrontal_L_row_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_col_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_col_buffers_.begin(); it != multifrontal_L_col_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_element_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_L_element_buffers_.begin(); it != multifrontal_L_element_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); @@ -636,22 +636,22 @@ private: viennacl::switch_memory_context(multifrontal_U_diagonal_, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_index_arrays_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_row_index_arrays_.begin(); it != multifrontal_U_row_index_arrays_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_row_buffers_.begin(); it != multifrontal_U_row_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_col_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_col_buffers_.begin(); it != multifrontal_U_col_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_element_buffers_.begin(); + for (typename std::list< viennacl::backend::mem_handle<> >::iterator it = multifrontal_U_element_buffers_.begin(); it != multifrontal_U_element_buffers_.end(); ++it) viennacl::backend::switch_memory_context(*it, viennacl::traits::context(mat)); @@ -663,17 +663,17 @@ private: viennacl::compressed_matrix L_; viennacl::compressed_matrix U_; - std::list multifrontal_L_row_index_arrays_; - std::list multifrontal_L_row_buffers_; - std::list multifrontal_L_col_buffers_; - std::list multifrontal_L_element_buffers_; + std::list> multifrontal_L_row_index_arrays_; + std::list> multifrontal_L_row_buffers_; + std::list> multifrontal_L_col_buffers_; + std::list> multifrontal_L_element_buffers_; std::list multifrontal_L_row_elimination_num_list_; viennacl::vector multifrontal_U_diagonal_; - std::list multifrontal_U_row_index_arrays_; - std::list multifrontal_U_row_buffers_; - std::list multifrontal_U_col_buffers_; - std::list multifrontal_U_element_buffers_; + std::list> multifrontal_U_row_index_arrays_; + std::list> multifrontal_U_row_buffers_; + std::list> multifrontal_U_col_buffers_; + std::list> multifrontal_U_element_buffers_; std::list multifrontal_U_row_elimination_num_list_; mutable viennacl::vector x_k_; diff --git a/viennacl/linalg/detail/op_executor.hpp b/viennacl/linalg/detail/op_executor.hpp index bd49b3bdc308526e6486c47b364de2801cf37c7c..68c8a8679f52927776fad1c4a2209ea6a2cca553 100644 --- a/viennacl/linalg/detail/op_executor.hpp +++ b/viennacl/linalg/detail/op_executor.hpp @@ -32,27 +32,27 @@ namespace linalg namespace detail { -template -bool op_aliasing(vector_base const & /*lhs*/, B const & /*b*/) +template +bool op_aliasing(vector_base const & /*lhs*/, B const & /*b*/) { return false; } -template -bool op_aliasing(vector_base const & lhs, vector_base const & b) +template +bool op_aliasing(vector_base const & lhs, vector_base const & b) { return lhs.handle() == b.handle(); } -template -bool op_aliasing(vector_base const & lhs, vector_expression const & rhs) +template +bool op_aliasing(vector_base const & lhs, vector_expression const & rhs) { return op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()); } -template -bool op_aliasing(matrix_base const & /*lhs*/, B const & /*b*/) +template +bool op_aliasing(matrix_base const & /*lhs*/, B const & /*b*/) { return false; } diff --git a/viennacl/linalg/host_based/misc_operations.hpp b/viennacl/linalg/host_based/misc_operations.hpp index 11061d935f290b4727f2c54fcfcf147c4605dc67..ce1f774faa7f3b38c4eb50ad54773ad32b57e7ca 100644 --- a/viennacl/linalg/host_based/misc_operations.hpp +++ b/viennacl/linalg/host_based/misc_operations.hpp @@ -40,10 +40,10 @@ namespace detail { template void level_scheduling_substitute(vector & vec, - viennacl::backend::mem_handle const & row_index_array, - viennacl::backend::mem_handle const & row_buffer, - viennacl::backend::mem_handle const & col_buffer, - viennacl::backend::mem_handle const & element_buffer, + viennacl::backend::mem_handle<> const & row_index_array, + viennacl::backend::mem_handle<> const & row_buffer, + viennacl::backend::mem_handle<> const & col_buffer, + viennacl::backend::mem_handle<> const & element_buffer, vcl_size_t num_rows ) { diff --git a/viennacl/linalg/host_based/sparse_matrix_operations.hpp b/viennacl/linalg/host_based/sparse_matrix_operations.hpp index 3cb738d14db36f7602043babb669570f8d838f9c..d535effd0ad9f768116d5afccdc839d62035b1f3 100644 --- a/viennacl/linalg/host_based/sparse_matrix_operations.hpp +++ b/viennacl/linalg/host_based/sparse_matrix_operations.hpp @@ -868,7 +868,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & L, - viennacl::backend::mem_handle const & /* block_indices */, vcl_size_t /* num_blocks */, + viennacl::backend::mem_handle<> const & /* block_indices */, vcl_size_t /* num_blocks */, vector_base const & /* L_diagonal */, //ignored vector_base & vec, viennacl::linalg::unit_lower_tag) @@ -899,7 +899,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & L, - viennacl::backend::mem_handle const & /*block_indices*/, vcl_size_t /* num_blocks */, + viennacl::backend::mem_handle<> const & /*block_indices*/, vcl_size_t /* num_blocks */, vector_base const & L_diagonal, vector_base & vec, viennacl::linalg::lower_tag) @@ -935,7 +935,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & U, - viennacl::backend::mem_handle const & /*block_indices*/, vcl_size_t /* num_blocks */, + viennacl::backend::mem_handle<> const & /*block_indices*/, vcl_size_t /* num_blocks */, vector_base const & /* U_diagonal */, //ignored vector_base & vec, viennacl::linalg::unit_upper_tag) @@ -968,7 +968,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & U, - viennacl::backend::mem_handle const & /* block_indices */, vcl_size_t /* num_blocks */, + viennacl::backend::mem_handle<> const & /* block_indices */, vcl_size_t /* num_blocks */, vector_base const & U_diagonal, vector_base & vec, viennacl::linalg::upper_tag) diff --git a/viennacl/linalg/host_based/vector_operations.hpp b/viennacl/linalg/host_based/vector_operations.hpp index 01d87166c93c899998ccc8e37e89610c968394a6..6c41ba6db81463bbf148b3f33ffa62a2099d8dfc 100644 --- a/viennacl/linalg/host_based/vector_operations.hpp +++ b/viennacl/linalg/host_based/vector_operations.hpp @@ -64,8 +64,8 @@ namespace detail // // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here! // -template -void convert(vector_base & dest, vector_base const & src) +template +void convert(vector_base & dest, vector_base const & src) { DestNumericT * data_dest = detail::extract_raw_pointer(dest); SrcNumericT const * data_src = detail::extract_raw_pointer(src); @@ -84,9 +84,9 @@ void convert(vector_base & dest, vector_base const & data_dest[static_cast(i)*inc_dest+start_dest] = static_cast(data_src[static_cast(i)*inc_src+start_src]); } -template -void av(vector_base & vec1, - vector_base const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha) +template +void av(vector_base & vec1, + vector_base const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha) { typedef NumericT value_type; @@ -123,10 +123,10 @@ void av(vector_base & vec1, } -template -void avbv(vector_base & vec1, - vector_base const & vec2, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha, - vector_base const & vec3, ScalarT2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta) +template +void avbv(vector_base & vec1, + vector_base const & vec2, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha, + vector_base const & vec3, ScalarT2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta) { typedef NumericT value_type; @@ -193,10 +193,10 @@ void avbv(vector_base & vec1, } -template -void avbv_v(vector_base & vec1, - vector_base const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha, - vector_base const & vec3, ScalarT2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta) +template +void avbv_v(vector_base & vec1, + vector_base const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha, + vector_base const & vec3, ScalarT2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta) { typedef NumericT value_type; @@ -271,8 +271,8 @@ void avbv_v(vector_base & vec1, * @param alpha The value to be assigned * @param up_to_internal_size Specifies whether alpha should also be written to padded memory (mostly used for clearing the whole buffer). */ -template -void vector_assign(vector_base & vec1, const NumericT & alpha, bool up_to_internal_size = false) +template +void vector_assign(vector_base & vec1, const NumericT & alpha, bool up_to_internal_size = false) { typedef NumericT value_type; @@ -298,8 +298,8 @@ void vector_assign(vector_base & vec1, const NumericT & alpha, bool up * @param vec1 The first vector (or -range, or -slice) * @param vec2 The second vector (or -range, or -slice) */ -template -void vector_swap(vector_base & vec1, vector_base & vec2) +template +void vector_swap(vector_base & vec1, vector_base & vec2) { typedef NumericT value_type; @@ -332,9 +332,9 @@ void vector_swap(vector_base & vec1, vector_base & vec2) * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, const vector_base, op_element_binary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, const vector_base, op_element_binary > const & proxy) { typedef NumericT value_type; typedef viennacl::linalg::detail::op_applier > OpFunctor; @@ -365,9 +365,9 @@ void element_op(vector_base & vec1, * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding alpha, v3 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, const NumericT, op_element_binary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, const NumericT, op_element_binary > const & proxy) { typedef NumericT value_type; typedef viennacl::linalg::detail::op_applier > OpFunctor; @@ -394,9 +394,9 @@ void element_op(vector_base & vec1, * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, alpha and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, op_element_binary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, op_element_binary > const & proxy) { typedef NumericT value_type; typedef viennacl::linalg::detail::op_applier > OpFunctor; @@ -425,9 +425,9 @@ void element_op(vector_base & vec1, * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, const vector_base, op_element_unary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, const vector_base, op_element_unary > const & proxy) { typedef NumericT value_type; typedef viennacl::linalg::detail::op_applier > OpFunctor; @@ -553,9 +553,9 @@ VIENNACL_INNER_PROD_IMPL_2(double) * @param vec2 The second vector * @param result The result scalar (on the gpu) */ -template -void inner_prod_impl(vector_base const & vec1, - vector_base const & vec2, +template +void inner_prod_impl(vector_base const & vec1, + vector_base const & vec2, ScalarT & result) { typedef NumericT value_type; @@ -574,10 +574,10 @@ void inner_prod_impl(vector_base const & vec1, data_vec2, start2, inc2); //Note: Assignment to result might be expensive, thus a temporary is introduced here } -template -void inner_prod_impl(vector_base const & x, - vector_tuple const & vec_tuple, - vector_base & result) +template +void inner_prod_impl(vector_base const & x, + vector_tuple const & vec_tuple, + vector_base & result) { typedef NumericT value_type; @@ -704,8 +704,8 @@ VIENNACL_NORM_1_IMPL_2(double, double) * @param vec1 The vector * @param result The result scalar */ -template -void norm_1_impl(vector_base const & vec1, +template +void norm_1_impl(vector_base const & vec1, ScalarT & result) { typedef NumericT value_type; @@ -817,8 +817,8 @@ VIENNACL_NORM_2_IMPL_2(double, double) * @param vec1 The vector * @param result The result scalar */ -template -void norm_2_impl(vector_base const & vec1, +template +void norm_2_impl(vector_base const & vec1, ScalarT & result) { typedef NumericT value_type; @@ -837,8 +837,8 @@ void norm_2_impl(vector_base const & vec1, * @param vec1 The vector * @param result The result scalar */ -template -void norm_inf_impl(vector_base const & vec1, +template +void norm_inf_impl(vector_base const & vec1, ScalarT & result) { typedef NumericT value_type; @@ -887,8 +887,8 @@ void norm_inf_impl(vector_base const & vec1, * @param vec1 The vector * @return The result. Note that the result must be a CPU scalar (unsigned int), since gpu scalars are floating point types. */ -template -vcl_size_t index_norm_inf(vector_base const & vec1) +template +vcl_size_t index_norm_inf(vector_base const & vec1) { typedef NumericT value_type; @@ -947,8 +947,8 @@ vcl_size_t index_norm_inf(vector_base const & vec1) * @param vec1 The vector * @param result The result scalar */ -template -void max_impl(vector_base const & vec1, +template +void max_impl(vector_base const & vec1, ScalarT & result) { typedef NumericT value_type; @@ -996,8 +996,8 @@ void max_impl(vector_base const & vec1, * @param vec1 The vector * @param result The result scalar */ -template -void min_impl(vector_base const & vec1, +template +void min_impl(vector_base const & vec1, ScalarT & result) { typedef NumericT value_type; @@ -1045,8 +1045,8 @@ void min_impl(vector_base const & vec1, * @param vec1 The vector * @param result The result scalar */ -template -void sum_impl(vector_base const & vec1, +template +void sum_impl(vector_base const & vec1, ScalarT & result) { typedef NumericT value_type; @@ -1076,9 +1076,9 @@ void sum_impl(vector_base const & vec1, * @param alpha The first transformation coefficient * @param beta The second transformation coefficient */ -template -void plane_rotation(vector_base & vec1, - vector_base & vec2, +template +void plane_rotation(vector_base & vec1, + vector_base & vec2, NumericT alpha, NumericT beta) { typedef NumericT value_type; @@ -1112,9 +1112,9 @@ void plane_rotation(vector_base & vec1, namespace detail { /** @brief Implementation of inclusive_scan and exclusive_scan for the host (OpenMP) backend. */ - template - void vector_scan_impl(vector_base const & vec1, - vector_base & vec2, + template + void vector_scan_impl(vector_base const & vec1, + vector_base & vec2, bool is_inclusive) { NumericT const * data_vec1 = detail::extract_raw_pointer(vec1); @@ -1217,9 +1217,9 @@ namespace detail * @param vec1 Input vector: Gets overwritten by the routine. * @param vec2 The output vector. Either idential to vec1 or non-overlapping. */ -template -void inclusive_scan(vector_base const & vec1, - vector_base & vec2) +template +void inclusive_scan(vector_base const & vec1, + vector_base & vec2) { detail::vector_scan_impl(vec1, vec2, true); } @@ -1232,9 +1232,9 @@ void inclusive_scan(vector_base const & vec1, * @param vec1 Input vector: Gets overwritten by the routine. * @param vec2 The output vector. Either idential to vec1 or non-overlapping. */ -template -void exclusive_scan(vector_base const & vec1, - vector_base & vec2) +template +void exclusive_scan(vector_base const & vec1, + vector_base & vec2) { detail::vector_scan_impl(vec1, vec2, false); } diff --git a/viennacl/linalg/misc_operations.hpp b/viennacl/linalg/misc_operations.hpp index 208573fd49b1cbb88b7e32384533b1c485ced068..bf42cc9ac010c90a8c729175d975611ffec25ae1 100644 --- a/viennacl/linalg/misc_operations.hpp +++ b/viennacl/linalg/misc_operations.hpp @@ -47,10 +47,10 @@ namespace viennacl template void level_scheduling_substitute(vector & vec, - viennacl::backend::mem_handle const & row_index_array, - viennacl::backend::mem_handle const & row_buffer, - viennacl::backend::mem_handle const & col_buffer, - viennacl::backend::mem_handle const & element_buffer, + viennacl::backend::mem_handle<> const & row_index_array, + viennacl::backend::mem_handle<> const & row_buffer, + viennacl::backend::mem_handle<> const & col_buffer, + viennacl::backend::mem_handle<> const & element_buffer, vcl_size_t num_rows ) { diff --git a/viennacl/linalg/mixed_precision_cg.hpp b/viennacl/linalg/mixed_precision_cg.hpp index 78254b34a47b26759bb0c5be3b6d9a2f013e6952..fa4fc7a5bfdf7bc6c58b59aea32fcca0dc5e4fd6 100644 --- a/viennacl/linalg/mixed_precision_cg.hpp +++ b/viennacl/linalg/mixed_precision_cg.hpp @@ -128,10 +128,10 @@ namespace viennacl // transfer matrix to single precision: viennacl::compressed_matrix matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs)); - viennacl::backend::memory_copy(matrix.handle1(), const_cast(matrix_low_precision.handle1()), 0, 0, matrix_low_precision.handle1().raw_size() ); - viennacl::backend::memory_copy(matrix.handle2(), const_cast(matrix_low_precision.handle2()), 0, 0, matrix_low_precision.handle2().raw_size() ); + viennacl::backend::memory_copy(matrix.handle1(), const_cast &>(matrix_low_precision.handle1()), 0, 0, matrix_low_precision.handle1().raw_size() ); + viennacl::backend::memory_copy(matrix.handle2(), const_cast &>(matrix_low_precision.handle2()), 0, 0, matrix_low_precision.handle2().raw_size() ); - viennacl::vector_base matrix_elements_high_precision(const_cast(matrix.handle()), matrix.nnz(), 0, 1); + viennacl::vector_base matrix_elements_high_precision(const_cast &>(matrix.handle()), matrix.nnz(), 0, 1); viennacl::vector_base matrix_elements_low_precision(matrix_low_precision.handle(), matrix.nnz(), 0, 1); matrix_elements_low_precision = matrix_elements_high_precision; matrix_low_precision.generate_row_block_information(); diff --git a/viennacl/linalg/norm_2.hpp b/viennacl/linalg/norm_2.hpp index babb2856744141cf9789fe8ddbf80f3bd9e549ea..26a2da01f0f162f26172a0b1b38bd370fd9594ae 100644 --- a/viennacl/linalg/norm_2.hpp +++ b/viennacl/linalg/norm_2.hpp @@ -105,15 +105,15 @@ namespace viennacl // ---------------------------------------------------- // VIENNACL // - template< typename ScalarType> - viennacl::scalar_expression< const viennacl::vector_base, - const viennacl::vector_base, + template< typename ScalarType, typename H=viennacl::ocl::handle > + viennacl::scalar_expression< const viennacl::vector_base, + const viennacl::vector_base, viennacl::op_norm_2 > - norm_2(viennacl::vector_base const & v) + norm_2(viennacl::vector_base const & v) { //std::cout << "viennacl .. " << std::endl; - return viennacl::scalar_expression< const viennacl::vector_base, - const viennacl::vector_base, + return viennacl::scalar_expression< const viennacl::vector_base, + const viennacl::vector_base, viennacl::op_norm_2 >(v, v); } diff --git a/viennacl/linalg/opencl/ilu_operations.hpp b/viennacl/linalg/opencl/ilu_operations.hpp index 248a88ac8ab69f5bd8106dbcdbcaee26602b5350..e67b8c53fb93b61fb55cdfea2c29a1d3fb1977c0 100644 --- a/viennacl/linalg/opencl/ilu_operations.hpp +++ b/viennacl/linalg/opencl/ilu_operations.hpp @@ -119,7 +119,7 @@ void icc_chow_patel_sweep(compressed_matrix & L, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(L).context()); viennacl::linalg::opencl::kernels::ilu::init(ctx); - viennacl::backend::mem_handle L_backup; + viennacl::backend::mem_handle<> L_backup; viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L)); viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size()); @@ -218,11 +218,11 @@ void ilu_chow_patel_sweep(compressed_matrix & L, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(L).context()); viennacl::linalg::opencl::kernels::ilu::init(ctx); - viennacl::backend::mem_handle L_backup; + viennacl::backend::mem_handle<> L_backup; viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L)); viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size()); - viennacl::backend::mem_handle U_backup; + viennacl::backend::mem_handle<> U_backup; viennacl::backend::memory_create(U_backup, U_trans.handle().raw_size(), viennacl::traits::context(U_trans)); viennacl::backend::memory_copy(U_trans.handle(), U_backup, 0, 0, U_trans.handle().raw_size()); diff --git a/viennacl/linalg/opencl/misc_operations.hpp b/viennacl/linalg/opencl/misc_operations.hpp index 83a3db77f6a88134872abdddbb930d626a9c317d..38d9d1202ffcc2b1a310644fa27eab9793d52bf8 100644 --- a/viennacl/linalg/opencl/misc_operations.hpp +++ b/viennacl/linalg/opencl/misc_operations.hpp @@ -43,10 +43,10 @@ namespace detail template void level_scheduling_substitute(vector & x, - viennacl::backend::mem_handle const & row_index_array, - viennacl::backend::mem_handle const & row_buffer, - viennacl::backend::mem_handle const & col_buffer, - viennacl::backend::mem_handle const & element_buffer, + viennacl::backend::mem_handle<> const & row_index_array, + viennacl::backend::mem_handle<> const & row_buffer, + viennacl::backend::mem_handle<> const & col_buffer, + viennacl::backend::mem_handle<> const & element_buffer, vcl_size_t num_rows ) { diff --git a/viennacl/linalg/opencl/sparse_matrix_operations.hpp b/viennacl/linalg/opencl/sparse_matrix_operations.hpp index a8d1557b365c7f60f3ace797dae93c901a1495b4..6e55b2c50a177bfba7f79d61b84585959330d304 100644 --- a/viennacl/linalg/opencl/sparse_matrix_operations.hpp +++ b/viennacl/linalg/opencl/sparse_matrix_operations.hpp @@ -483,7 +483,7 @@ namespace detail void block_inplace_solve(const matrix_expression, const compressed_matrix, op_trans> & L, - viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks, + viennacl::backend::mem_handle<> const & block_indices, vcl_size_t num_blocks, vector_base const & /* L_diagonal */, //ignored vector_base & x, viennacl::linalg::unit_lower_tag) @@ -506,7 +506,7 @@ namespace detail void block_inplace_solve(matrix_expression, const compressed_matrix, op_trans> const & U, - viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks, + viennacl::backend::mem_handle<> const & block_indices, vcl_size_t num_blocks, vector_base const & U_diagonal, vector_base & x, viennacl::linalg::upper_tag) diff --git a/viennacl/linalg/opencl/vector_operations.hpp b/viennacl/linalg/opencl/vector_operations.hpp index 2b1c24ca42f4d42b1c5d95cf422a53d66accfe26..2b3423459fbee568ed1c671dfa35aca989f8861d 100644 --- a/viennacl/linalg/opencl/vector_operations.hpp +++ b/viennacl/linalg/opencl/vector_operations.hpp @@ -42,6 +42,12 @@ #include "viennacl/traits/handle.hpp" #include "viennacl/traits/stride.hpp" +#ifdef VIENNACL_WITH_OPENCL +#define TEMP_HANDLE viennacl::ocl::pooled_clmem_handle +#else +#define TEMP_HANDLE viennacl::ocl::handle +#endif + namespace viennacl { namespace linalg @@ -52,8 +58,8 @@ namespace opencl // // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here! // -template -void convert(vector_base & dest, vector_base const & src) +template +void convert(vector_base & dest, vector_base const & src) { assert(viennacl::traits::opencl_handle(dest).context() == viennacl::traits::opencl_handle(src).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -72,9 +78,9 @@ void convert(vector_base & dest, vector_base const & } -template -void av(vector_base & vec1, - vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) +template +void av(vector_base & vec1, + vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -112,10 +118,10 @@ void av(vector_base & vec1, } -template -void avbv(vector_base & vec1, - vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, - vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) +template +void avbv(vector_base & vec1, + vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, + vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); assert(viennacl::traits::opencl_handle(vec2).context() == viennacl::traits::opencl_handle(vec3).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -174,10 +180,10 @@ void avbv(vector_base & vec1, } -template -void avbv_v(vector_base & vec1, - vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, - vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) +template +void avbv_v(vector_base & vec1, + vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, + vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); assert(viennacl::traits::opencl_handle(vec2).context() == viennacl::traits::opencl_handle(vec3).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -242,8 +248,8 @@ void avbv_v(vector_base & vec1, * @param alpha The value to be assigned * @param up_to_internal_size Specifies whether alpha should also be written to padded memory (mostly used for clearing the whole buffer). */ -template -void vector_assign(vector_base & vec1, const T & alpha, bool up_to_internal_size = false) +template +void vector_assign(vector_base & vec1, const T & alpha, bool up_to_internal_size = false) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec1).context()); viennacl::linalg::opencl::kernels::vector::init(ctx); @@ -268,8 +274,8 @@ void vector_assign(vector_base & vec1, const T & alpha, bool up_to_internal_s * @param vec1 The first vector (or -range, or -slice) * @param vec2 The second vector (or -range, or -slice) */ -template -void vector_swap(vector_base & vec1, vector_base & vec2) +template +void vector_swap(vector_base & vec1, vector_base & vec2) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -296,9 +302,9 @@ void vector_swap(vector_base & vec1, vector_base & vec2) * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, const vector_base, op_element_binary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, const vector_base, op_element_binary > const & proxy) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -343,9 +349,9 @@ void element_op(vector_base & vec1, * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, const T, op_element_binary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, const T, op_element_binary > const & proxy) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -387,9 +393,9 @@ void element_op(vector_base & vec1, * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, op_element_binary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, op_element_binary > const & proxy) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -432,9 +438,9 @@ void element_op(vector_base & vec1, * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2 and the operation */ -template -void element_op(vector_base & vec1, - vector_expression, const vector_base, op_element_unary > const & proxy) +template +void element_op(vector_base & vec1, + vector_expression, const vector_base, op_element_unary > const & proxy) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -471,10 +477,10 @@ void element_op(vector_base & vec1, * @param vec2 The second vector * @param partial_result The results of each group */ -template -void inner_prod_impl(vector_base const & vec1, - vector_base const & vec2, - vector_base & partial_result) +template +void inner_prod_impl(vector_base const & vec1, + vector_base const & vec2, + vector_base & partial_result) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); assert(viennacl::traits::opencl_handle(vec2).context() == viennacl::traits::opencl_handle(partial_result).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -520,9 +526,9 @@ void inner_prod_impl(vector_base const & vec1, * @param vec2 The second vector * @param result The result scalar (on the gpu) */ -template -void inner_prod_impl(vector_base const & vec1, - vector_base const & vec2, +template +void inner_prod_impl(vector_base const & vec1, + vector_base const & vec2, scalar & result) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -531,7 +537,7 @@ void inner_prod_impl(vector_base const & vec1, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec1).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec1)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec1)); temp.resize(work_groups, ctx); // bring default-constructed vectors to the correct size: // Step 1: Compute partial inner products for each work group: @@ -571,10 +577,10 @@ namespace detail * @param vec_tuple The tuple of vectors y1, y2, ..., yN * @param result The result vector */ -template -void inner_prod_impl(vector_base const & x, - vector_tuple const & vec_tuple, - vector_base & result) +template +void inner_prod_impl(vector_base const & x, + vector_tuple const & vec_tuple, + vector_base & result) { assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -604,10 +610,10 @@ void inner_prod_impl(vector_base const & x, case 5: case 4: { - vector_base const & y0 = vec_tuple.const_at(current_index ); - vector_base const & y1 = vec_tuple.const_at(current_index + 1); - vector_base const & y2 = vec_tuple.const_at(current_index + 2); - vector_base const & y3 = vec_tuple.const_at(current_index + 3); + vector_base const & y0 = vec_tuple.const_at(current_index ); + vector_base const & y1 = vec_tuple.const_at(current_index + 1); + vector_base const & y2 = vec_tuple.const_at(current_index + 2); + vector_base const & y3 = vec_tuple.const_at(current_index + 3); viennacl::ocl::enqueue(inner_prod_kernel_4( viennacl::traits::opencl_handle(x), layout_x, viennacl::traits::opencl_handle(y0), detail::make_layout(y0), viennacl::traits::opencl_handle(y1), detail::make_layout(y1), @@ -632,9 +638,9 @@ void inner_prod_impl(vector_base const & x, case 3: { - vector_base const & y0 = vec_tuple.const_at(current_index ); - vector_base const & y1 = vec_tuple.const_at(current_index + 1); - vector_base const & y2 = vec_tuple.const_at(current_index + 2); + vector_base const & y0 = vec_tuple.const_at(current_index ); + vector_base const & y1 = vec_tuple.const_at(current_index + 1); + vector_base const & y2 = vec_tuple.const_at(current_index + 2); viennacl::ocl::enqueue(inner_prod_kernel_3( viennacl::traits::opencl_handle(x), layout_x, viennacl::traits::opencl_handle(y0), detail::make_layout(y0), viennacl::traits::opencl_handle(y1), detail::make_layout(y1), @@ -658,8 +664,8 @@ void inner_prod_impl(vector_base const & x, case 2: { - vector_base const & y0 = vec_tuple.const_at(current_index ); - vector_base const & y1 = vec_tuple.const_at(current_index + 1); + vector_base const & y0 = vec_tuple.const_at(current_index ); + vector_base const & y1 = vec_tuple.const_at(current_index + 1); viennacl::ocl::enqueue(inner_prod_kernel_2( viennacl::traits::opencl_handle(x), layout_x, viennacl::traits::opencl_handle(y0), detail::make_layout(y0), viennacl::traits::opencl_handle(y1), detail::make_layout(y1), @@ -682,7 +688,7 @@ void inner_prod_impl(vector_base const & x, case 1: { - vector_base const & y0 = vec_tuple.const_at(current_index ); + vector_base const & y0 = vec_tuple.const_at(current_index ); viennacl::ocl::enqueue(inner_prod_kernel_1( viennacl::traits::opencl_handle(x), layout_x, viennacl::traits::opencl_handle(y0), detail::make_layout(y0), viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type::type) * 1 * inner_prod_kernel_1.local_work_size()), @@ -704,14 +710,14 @@ void inner_prod_impl(vector_base const & x, default: //8 or more vectors { - vector_base const & y0 = vec_tuple.const_at(current_index ); - vector_base const & y1 = vec_tuple.const_at(current_index + 1); - vector_base const & y2 = vec_tuple.const_at(current_index + 2); - vector_base const & y3 = vec_tuple.const_at(current_index + 3); - vector_base const & y4 = vec_tuple.const_at(current_index + 4); - vector_base const & y5 = vec_tuple.const_at(current_index + 5); - vector_base const & y6 = vec_tuple.const_at(current_index + 6); - vector_base const & y7 = vec_tuple.const_at(current_index + 7); + vector_base const & y0 = vec_tuple.const_at(current_index ); + vector_base const & y1 = vec_tuple.const_at(current_index + 1); + vector_base const & y2 = vec_tuple.const_at(current_index + 2); + vector_base const & y3 = vec_tuple.const_at(current_index + 3); + vector_base const & y4 = vec_tuple.const_at(current_index + 4); + vector_base const & y5 = vec_tuple.const_at(current_index + 5); + vector_base const & y6 = vec_tuple.const_at(current_index + 6); + vector_base const & y7 = vec_tuple.const_at(current_index + 7); viennacl::ocl::enqueue(inner_prod_kernel_8( viennacl::traits::opencl_handle(x), layout_x, viennacl::traits::opencl_handle(y0), detail::make_layout(y0), viennacl::traits::opencl_handle(y1), detail::make_layout(y1), @@ -752,9 +758,9 @@ void inner_prod_impl(vector_base const & x, * @param vec2 The second vector * @param result The result scalar (on the gpu) */ -template -void inner_prod_cpu(vector_base const & vec1, - vector_base const & vec2, +template +void inner_prod_cpu(vector_base const & vec1, + vector_base const & vec2, T & result) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -762,7 +768,7 @@ void inner_prod_cpu(vector_base const & vec1, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec1).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec1)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec1)); temp.resize(work_groups, ctx); // bring default-constructed vectors to the correct size: // Step 1: Compute partial inner products for each work group: @@ -788,9 +794,9 @@ void inner_prod_cpu(vector_base const & vec1, * @param partial_result The result scalar * @param norm_id Norm selector. 0: norm_inf, 1: norm_1, 2: norm_2 */ -template -void norm_reduction_impl(vector_base const & vec, - vector_base & partial_result, +template +void norm_reduction_impl(vector_base const & vec, + vector_base & partial_result, cl_uint norm_id) { assert(viennacl::traits::opencl_handle(vec).context() == viennacl::traits::opencl_handle(partial_result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -820,8 +826,8 @@ void norm_reduction_impl(vector_base const & vec, * @param vec The vector * @param result The result scalar */ -template -void norm_1_impl(vector_base const & vec, +template +void norm_1_impl(vector_base const & vec, scalar & result) { assert(viennacl::traits::opencl_handle(vec).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -829,7 +835,7 @@ void norm_1_impl(vector_base const & vec, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 1); @@ -853,12 +859,12 @@ void norm_1_impl(vector_base const & vec, * @param vec The vector * @param result The result scalar */ -template -void norm_1_cpu(vector_base const & vec, +template +void norm_1_cpu(vector_base const & vec, T & result) { vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 1); @@ -884,8 +890,8 @@ void norm_1_cpu(vector_base const & vec, * @param vec The vector * @param result The result scalar */ -template -void norm_2_impl(vector_base const & vec, +template +void norm_2_impl(vector_base const & vec, scalar & result) { assert(viennacl::traits::opencl_handle(vec).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -893,7 +899,7 @@ void norm_2_impl(vector_base const & vec, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 2); @@ -917,12 +923,12 @@ void norm_2_impl(vector_base const & vec, * @param vec The vector * @param result The result scalar */ -template -void norm_2_cpu(vector_base const & vec, +template +void norm_2_cpu(vector_base const & vec, T & result) { vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 2); @@ -948,8 +954,8 @@ void norm_2_cpu(vector_base const & vec, * @param vec The vector * @param result The result scalar */ -template -void norm_inf_impl(vector_base const & vec, +template +void norm_inf_impl(vector_base const & vec, scalar & result) { assert(viennacl::traits::opencl_handle(vec).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -957,7 +963,7 @@ void norm_inf_impl(vector_base const & vec, viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 0); @@ -981,12 +987,12 @@ void norm_inf_impl(vector_base const & vec, * @param vec The vector * @param result The result scalar */ -template -void norm_inf_cpu(vector_base const & vec, +template +void norm_inf_cpu(vector_base const & vec, T & result) { vcl_size_t work_groups = 128; - viennacl::vector temp(work_groups, viennacl::traits::context(vec)); + viennacl::vector temp(work_groups, viennacl::traits::context(vec)); // Step 1: Compute the partial work group results norm_reduction_impl(vec, temp, 0); @@ -1013,8 +1019,8 @@ void norm_inf_cpu(vector_base const & vec, * @param vec The vector * @return The result. Note that the result must be a CPU scalar (unsigned int), since gpu scalars are floating point types. */ -template -cl_uint index_norm_inf(vector_base const & vec) +template +cl_uint index_norm_inf(vector_base const & vec) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(vec).context()); viennacl::linalg::opencl::kernels::vector::init(ctx); @@ -1049,8 +1055,8 @@ cl_uint index_norm_inf(vector_base const & vec) * @param x The vector * @param result The result scalar */ -template -void max_impl(vector_base const & x, +template +void max_impl(vector_base const & x, scalar & result) { assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -1087,8 +1093,8 @@ void max_impl(vector_base const & x, * @param x The vector * @param result The result scalar */ -template -void max_cpu(vector_base const & x, +template +void max_cpu(vector_base const & x, NumericT & result) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(x).context()); @@ -1128,8 +1134,8 @@ void max_cpu(vector_base const & x, * @param x The vector * @param result The result scalar */ -template -void min_impl(vector_base const & x, +template +void min_impl(vector_base const & x, scalar & result) { assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -1166,8 +1172,8 @@ void min_impl(vector_base const & x, * @param x The vector * @param result The result scalar */ -template -void min_cpu(vector_base const & x, +template +void min_cpu(vector_base const & x, NumericT & result) { viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(x).context()); @@ -1205,13 +1211,13 @@ void min_cpu(vector_base const & x, * @param x The vector * @param result The result scalar */ -template -void sum_impl(vector_base const & x, - scalar & result) +template +void sum_impl(vector_base const & x, + scalar & result) { assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); - viennacl::vector all_ones = viennacl::scalar_vector(x.size(), NumericT(1), viennacl::traits::context(x)); + viennacl::vector all_ones = viennacl::scalar_vector(x.size(), NumericT(1), viennacl::traits::context(x)); viennacl::linalg::opencl::inner_prod_impl(x, all_ones, result); } @@ -1239,9 +1245,9 @@ void sum_cpu(vector_base const & x, NumericT & result) * @param alpha The first transformation coefficient * @param beta The second transformation coefficient */ -template -void plane_rotation(vector_base & vec1, - vector_base & vec2, +template +void plane_rotation(vector_base & vec1, + vector_base & vec2, T alpha, T beta) { assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); @@ -1276,15 +1282,15 @@ namespace detail * Note on performance: For non-in-place scans one could optimize away the temporary 'opencl_carries'-array. * This, however, only provides small savings in the latency-dominated regime, yet would effectively double the amount of code to maintain. */ - template - void scan_impl(vector_base const & input, - vector_base & output, + template + void scan_impl(vector_base const & input, + vector_base & output, bool is_inclusive) { vcl_size_t local_worksize = 128; vcl_size_t workgroups = 128; - viennacl::backend::mem_handle opencl_carries; + viennacl::backend::mem_handle

opencl_carries; viennacl::backend::memory_create(opencl_carries, sizeof(NumericT)*workgroups, viennacl::traits::context(input)); viennacl::ocl::context & ctx = const_cast(viennacl::traits::opencl_handle(input).context()); @@ -1321,9 +1327,9 @@ namespace detail * @param input Input vector. * @param output The output vector. Either idential to input or non-overlapping. */ -template -void inclusive_scan(vector_base const & input, - vector_base & output) +template +void inclusive_scan(vector_base const & input, + vector_base & output) { detail::scan_impl(input, output, true); } @@ -1334,9 +1340,9 @@ void inclusive_scan(vector_base const & input, * @param input Input vector * @param output The output vector. Either idential to input or non-overlapping. */ -template -void exclusive_scan(vector_base const & input, - vector_base & output) +template +void exclusive_scan(vector_base const & input, + vector_base & output) { detail::scan_impl(input, output, false); } diff --git a/viennacl/linalg/sparse_matrix_operations.hpp b/viennacl/linalg/sparse_matrix_operations.hpp index dccb330c2e59bd1f83c7b73140a209c43b6aa80c..e6ffdbbbc00dd882c38f7d4caab937f1db16db2e 100644 --- a/viennacl/linalg/sparse_matrix_operations.hpp +++ b/viennacl/linalg/sparse_matrix_operations.hpp @@ -330,7 +330,7 @@ namespace viennacl template typename viennacl::enable_if< viennacl::is_any_sparse_matrix::value>::type block_inplace_solve(const matrix_expression & mat, - viennacl::backend::mem_handle const & block_index_array, vcl_size_t num_blocks, + viennacl::backend::mem_handle<> const & block_index_array, vcl_size_t num_blocks, viennacl::vector_base const & mat_diagonal, viennacl::vector_base & vec, SOLVERTAG tag) diff --git a/viennacl/linalg/vector_operations.hpp b/viennacl/linalg/vector_operations.hpp index 5add9ceaddd3c0d378ebd3400f14f556ab76d385..3c75d03ff14b42fbaf10e26295480390c3a6ecc1 100644 --- a/viennacl/linalg/vector_operations.hpp +++ b/viennacl/linalg/vector_operations.hpp @@ -47,8 +47,8 @@ namespace viennacl { namespace linalg { - template - void convert(vector_base & dest, vector_base const & src) + template + void convert(vector_base & dest, vector_base const & src) { assert(viennacl::traits::size(dest) == viennacl::traits::size(src) && bool("Incompatible vector sizes in v1 = v2 (convert): size(v1) != size(v2)")); @@ -74,9 +74,9 @@ namespace viennacl } } - template - void av(vector_base & vec1, - vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) + template + void av(vector_base & vec1, + vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2) && bool("Incompatible vector sizes in v1 = v2 @ alpha: size(v1) != size(v2)")); @@ -103,10 +103,10 @@ namespace viennacl } - template - void avbv(vector_base & vec1, - vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, - vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) + template + void avbv(vector_base & vec1, + vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, + vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2) && bool("Incompatible vector sizes in v1 = v2 @ alpha + v3 @ beta: size(v1) != size(v2)")); assert(viennacl::traits::size(vec2) == viennacl::traits::size(vec3) && bool("Incompatible vector sizes in v1 = v2 @ alpha + v3 @ beta: size(v2) != size(v3)")); @@ -140,10 +140,10 @@ namespace viennacl } - template - void avbv_v(vector_base & vec1, - vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, - vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) + template + void avbv_v(vector_base & vec1, + vector_base const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, + vector_base const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2) && bool("Incompatible vector sizes in v1 += v2 @ alpha + v3 @ beta: size(v1) != size(v2)")); assert(viennacl::traits::size(vec2) == viennacl::traits::size(vec3) && bool("Incompatible vector sizes in v1 += v2 @ alpha + v3 @ beta: size(v2) != size(v3)")); @@ -183,8 +183,8 @@ namespace viennacl * @param alpha The value to be assigned * @param up_to_internal_size Whether 'alpha' should be written to padded memory as well. This is used for setting all entries to zero, including padded memory. */ - template - void vector_assign(vector_base & vec1, const T & alpha, bool up_to_internal_size = false) + template + void vector_assign(vector_base & vec1, const T & alpha, bool up_to_internal_size = false) { switch (viennacl::traits::handle(vec1).get_active_handle_id()) { @@ -214,8 +214,8 @@ namespace viennacl * @param vec1 The first vector (or -range, or -slice) * @param vec2 The second vector (or -range, or -slice) */ - template - void vector_swap(vector_base & vec1, vector_base & vec2) + template + void vector_swap(vector_base & vec1, vector_base & vec2) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2) && bool("Incompatible vector sizes in vector_swap()")); @@ -251,9 +251,9 @@ namespace viennacl * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ - template - void element_op(vector_base & vec1, - vector_expression, const vector_base, OP> const & proxy) + template + void element_op(vector_base & vec1, + vector_expression, const vector_base, OP> const & proxy) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(proxy) && bool("Incompatible vector sizes in element_op()")); @@ -284,9 +284,9 @@ namespace viennacl * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ - template - void element_op(vector_base & vec1, - vector_expression, const T, OP> const & proxy) + template + void element_op(vector_base & vec1, + vector_expression, const T, OP> const & proxy) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(proxy) && bool("Incompatible vector sizes in element_op()")); @@ -317,9 +317,9 @@ namespace viennacl * @param vec1 The result vector (or -range, or -slice) * @param proxy The proxy object holding v2, v3 and the operation */ - template - void element_op(vector_base & vec1, - vector_expression, OP> const & proxy) + template + void element_op(vector_base & vec1, + vector_expression, OP> const & proxy) { assert(viennacl::traits::size(vec1) == viennacl::traits::size(proxy) && bool("Incompatible vector sizes in element_op()")); @@ -349,25 +349,25 @@ namespace viennacl // Helper macro for generating binary element-wise operations such as element_prod(), element_div(), element_pow() without unnecessary code duplication */ #define VIENNACL_GENERATE_BINARY_ELEMENTOPERATION_OVERLOADS(OPNAME) \ - template \ - viennacl::vector_expression, const vector_base, op_element_binary > \ - element_##OPNAME(vector_base const & v1, vector_base const & v2) \ + template \ + viennacl::vector_expression, const vector_base, op_element_binary > \ + element_##OPNAME(vector_base const & v1, vector_base const & v2) \ { \ - return viennacl::vector_expression, const vector_base, op_element_binary >(v1, v2); \ + return viennacl::vector_expression, const vector_base, op_element_binary >(v1, v2); \ } \ \ - template \ - viennacl::vector_expression, const vector_base, op_element_binary > \ - element_##OPNAME(vector_expression const & proxy, vector_base const & v2) \ + template \ + viennacl::vector_expression, const vector_base, op_element_binary > \ + element_##OPNAME(vector_expression const & proxy, vector_base const & v2) \ { \ - return viennacl::vector_expression, const vector_base, op_element_binary >(proxy, v2); \ + return viennacl::vector_expression, const vector_base, op_element_binary >(proxy, v2); \ } \ \ - template \ - viennacl::vector_expression, const vector_expression, op_element_binary > \ - element_##OPNAME(vector_base const & v1, vector_expression const & proxy) \ + template \ + viennacl::vector_expression, const vector_expression, op_element_binary > \ + element_##OPNAME(vector_base const & v1, vector_expression const & proxy) \ { \ - return viennacl::vector_expression, const vector_expression, op_element_binary >(v1, proxy); \ + return viennacl::vector_expression, const vector_expression, op_element_binary >(v1, proxy); \ } \ \ template >(proxy1, proxy2); \ }\ \ - template \ - viennacl::vector_expression, const T, op_element_binary > \ - element_##OPNAME(vector_base const & v1, T const & alpha) \ + template \ + viennacl::vector_expression, const T, op_element_binary > \ + element_##OPNAME(vector_base const & v1, T const & alpha) \ { \ - return viennacl::vector_expression, const T, op_element_binary >(v1, alpha); \ + return viennacl::vector_expression, const T, op_element_binary >(v1, alpha); \ } \ \ template \ @@ -397,11 +397,11 @@ namespace viennacl return viennacl::vector_expression, const typename viennacl::result_of::cpu_value_type::type, op_element_binary >(proxy, alpha); \ } \ \ - template \ - viennacl::vector_expression, op_element_binary > \ - element_##OPNAME(T const & alpha, vector_base const & v2) \ + template \ + viennacl::vector_expression, op_element_binary > \ + element_##OPNAME(T const & alpha, vector_base const & v2) \ { \ - return viennacl::vector_expression, op_element_binary >(alpha, v2); \ + return viennacl::vector_expression, op_element_binary >(alpha, v2); \ } \ \ template \ @@ -490,9 +490,9 @@ namespace viennacl * @param vec2 The second vector * @param result The result scalar (on the gpu) */ - template - void inner_prod_impl(vector_base const & vec1, - vector_base const & vec2, + template + void inner_prod_impl(vector_base const & vec1, + vector_base const & vec2, scalar & result) { assert( vec1.size() == vec2.size() && bool("Size mismatch") ); @@ -520,9 +520,9 @@ namespace viennacl } // vector expression on lhs - template + template void inner_prod_impl(viennacl::vector_expression const & vec1, - vector_base const & vec2, + vector_base const & vec2, scalar & result) { viennacl::vector temp = vec1; @@ -531,8 +531,8 @@ namespace viennacl // vector expression on rhs - template - void inner_prod_impl(vector_base const & vec1, + template + void inner_prod_impl(vector_base const & vec1, viennacl::vector_expression const & vec2, scalar & result) { @@ -562,9 +562,9 @@ namespace viennacl * @param vec2 The second vector * @param result The result scalar (on the gpu) */ - template - void inner_prod_cpu(vector_base const & vec1, - vector_base const & vec2, + template + void inner_prod_cpu(vector_base const & vec1, + vector_base const & vec2, T & result) { assert( vec1.size() == vec2.size() && bool("Size mismatch") ); @@ -592,9 +592,9 @@ namespace viennacl } // vector expression on lhs - template + template void inner_prod_cpu(viennacl::vector_expression const & vec1, - vector_base const & vec2, + vector_base const & vec2, T & result) { viennacl::vector temp = vec1; @@ -603,8 +603,8 @@ namespace viennacl // vector expression on rhs - template - void inner_prod_cpu(vector_base const & vec1, + template + void inner_prod_cpu(vector_base const & vec1, viennacl::vector_expression const & vec2, T & result) { @@ -633,10 +633,10 @@ namespace viennacl * @param y_tuple A collection of vector, all of the same size. * @param result The result scalar (on the gpu). Needs to match the number of elements in y_tuple */ - template - void inner_prod_impl(vector_base const & x, - vector_tuple const & y_tuple, - vector_base & result) + template + void inner_prod_impl(vector_base const & x, + vector_tuple const & y_tuple, + vector_base & result) { assert( x.size() == y_tuple.const_at(0).size() && bool("Size mismatch") ); assert( result.size() == y_tuple.const_size() && bool("Number of elements does not match result size") ); @@ -669,8 +669,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void norm_1_impl(vector_base const & vec, + template + void norm_1_impl(vector_base const & vec, scalar & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) @@ -716,8 +716,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void norm_1_cpu(vector_base const & vec, + template + void norm_1_cpu(vector_base const & vec, T & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) @@ -763,8 +763,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void norm_2_impl(vector_base const & vec, + template + void norm_2_impl(vector_base const & vec, scalar & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) @@ -808,8 +808,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void norm_2_cpu(vector_base const & vec, + template + void norm_2_cpu(vector_base const & vec, T & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) @@ -855,8 +855,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void norm_inf_impl(vector_base const & vec, + template + void norm_inf_impl(vector_base const & vec, scalar & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) @@ -900,8 +900,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void norm_inf_cpu(vector_base const & vec, + template + void norm_inf_cpu(vector_base const & vec, T & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) @@ -948,8 +948,8 @@ namespace viennacl * @param vec The vector * @return The result. Note that the result must be a CPU scalar */ - template - vcl_size_t index_norm_inf(vector_base const & vec) + template + vcl_size_t index_norm_inf(vector_base const & vec) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -988,8 +988,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void max_impl(vector_base const & vec, viennacl::scalar & result) + template + void max_impl(vector_base const & vec, viennacl::scalar & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -1031,8 +1031,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void max_cpu(vector_base const & vec, T & result) + template + void max_cpu(vector_base const & vec, T & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -1075,8 +1075,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void min_impl(vector_base const & vec, viennacl::scalar & result) + template + void min_impl(vector_base const & vec, viennacl::scalar & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -1118,8 +1118,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void min_cpu(vector_base const & vec, T & result) + template + void min_cpu(vector_base const & vec, T & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -1162,8 +1162,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void sum_impl(vector_base const & vec, viennacl::scalar & result) + template + void sum_impl(vector_base const & vec, viennacl::scalar & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -1205,8 +1205,8 @@ namespace viennacl * @param vec The vector * @param result The result scalar */ - template - void sum_cpu(vector_base const & vec, T & result) + template + void sum_cpu(vector_base const & vec, T & result) { switch (viennacl::traits::handle(vec).get_active_handle_id()) { @@ -1255,9 +1255,9 @@ namespace viennacl * @param alpha The first transformation coefficient (CPU scalar) * @param beta The second transformation coefficient (CPU scalar) */ - template - void plane_rotation(vector_base & vec1, - vector_base & vec2, + template + void plane_rotation(vector_base & vec1, + vector_base & vec2, T alpha, T beta) { switch (viennacl::traits::handle(vec1).get_active_handle_id()) @@ -1293,9 +1293,9 @@ namespace viennacl * @param vec1 Input vector. * @param vec2 The output vector. */ - template - void inclusive_scan(vector_base & vec1, - vector_base & vec2) + template + void inclusive_scan(vector_base & vec1, + vector_base & vec2) { switch (viennacl::traits::handle(vec1).get_active_handle_id()) { @@ -1326,8 +1326,8 @@ namespace viennacl * Given an input element vector (x_0, x_1, ..., x_{n-1}), * this routine overwrites the vector with (x_0, x_0 + x_1, ..., x_0 + x_1 + ... + x_{n-1}) */ - template - void inclusive_scan(vector_base & vec) + template + void inclusive_scan(vector_base & vec) { inclusive_scan(vec, vec); } @@ -1343,9 +1343,9 @@ namespace viennacl * @param vec1 Input vector. * @param vec2 The output vector. */ - template - void exclusive_scan(vector_base & vec1, - vector_base & vec2) + template + void exclusive_scan(vector_base & vec1, + vector_base & vec2) { switch (viennacl::traits::handle(vec1).get_active_handle_id()) { @@ -1376,15 +1376,15 @@ namespace viennacl * Given an element vector (x_0, x_1, ..., x_{n-1}), * this routine overwrites the input vector with (0, x_0, x_0 + x_1, ..., x_0 + x_1 + ... + x_{n-2}) */ - template - void exclusive_scan(vector_base & vec) + template + void exclusive_scan(vector_base & vec) { exclusive_scan(vec, vec); } } //namespace linalg - template - vector_base & operator += (vector_base & v1, const vector_expression & proxy) + template + vector_base & operator += (vector_base & v1, const vector_expression & proxy) { assert( (viennacl::traits::size(proxy) == v1.size()) && bool("Incompatible vector sizes!")); assert( (v1.size() > 0) && bool("Vector not yet initialized!") ); @@ -1394,8 +1394,8 @@ namespace viennacl return v1; } - template - vector_base & operator -= (vector_base & v1, const vector_expression & proxy) + template + vector_base & operator -= (vector_base & v1, const vector_expression & proxy) { assert( (viennacl::traits::size(proxy) == v1.size()) && bool("Incompatible vector sizes!")); assert( (v1.size() > 0) && bool("Vector not yet initialized!") ); diff --git a/viennacl/meta/result_of.hpp b/viennacl/meta/result_of.hpp index 32a0491a2f87ea4ccf59b8314611bc9afef3b329..67cb5384e615215efb87886af8c628ee28c81449 100644 --- a/viennacl/meta/result_of.hpp +++ b/viennacl/meta/result_of.hpp @@ -79,8 +79,8 @@ struct alignment enum { value = alignment::value }; }; -template -struct alignment< vector > +template +struct alignment< vector > { enum { value = AlignmentV }; }; @@ -142,8 +142,8 @@ struct size_type }; /** \cond */ -template -struct size_type< vector_base > +template +struct size_type< vector_base > { typedef SizeType type; }; @@ -339,14 +339,14 @@ struct cpu_value_type typedef double type; }; -template -struct cpu_value_type > +template +struct cpu_value_type > { typedef T type; }; -template -struct cpu_value_type > +template +struct cpu_value_type > { typedef T type; }; diff --git a/viennacl/ocl/context.hpp b/viennacl/ocl/context.hpp index b8b7f19f2af668fc79fd1fcef98a9bcce2e0c877..52f638f08b51fc3715808edf69776816dc28401e 100644 --- a/viennacl/ocl/context.hpp +++ b/viennacl/ocl/context.hpp @@ -31,8 +31,15 @@ #include #include #include +#include #include #include +#include +#include +#include +#include +#include +#include #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/error.hpp" #include "viennacl/ocl/handle.hpp" @@ -42,11 +49,102 @@ #include "viennacl/ocl/platform.hpp" #include "viennacl/ocl/command_queue.hpp" #include "viennacl/tools/sha1.hpp" +#include "viennacl/ocl/mempool/bitlog.hpp" #include "viennacl/tools/shared_ptr.hpp" +#include "viennacl/ocl/mempool/mempool.hpp" + + namespace viennacl { namespace ocl { + + // {{{ allocator class + + class cl_allocator_base + { + protected: + viennacl::ocl::context* m_context; + cl_mem_flags m_flags; + + public: + // CTOR + cl_allocator_base(viennacl::ocl::context* const &ctx, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : m_context(ctx), m_flags(flags) + { + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) + { + std::cerr << "[Allocator]: cannot specify USE_HOST_PTR or " + "COPY_HOST_PTR flags" << std::endl; + throw viennacl::ocl::invalid_value(); + } + } + + + // Copy CTOR + cl_allocator_base(cl_allocator_base const &src) + : m_context(src.m_context), m_flags(src.m_flags) + { } + + + ~cl_allocator_base() + { } + + void free(cl_mem p) + { + cl_int err = clReleaseMemObject(p); + VIENNACL_ERR_CHECK(err); +#ifdef VIENNACL_DEBUG_ALL + std :: cout << "[allocator]: deallocating memory: " << p << std::endl; +#endif + } + + virtual cl_allocator_base *copy() const = 0; + virtual bool is_deferred() const = 0; + virtual cl_mem allocate(size_t) = 0; + }; + + class cl_immediate_allocator : public cl_allocator_base + { + private: + viennacl::ocl::command_queue* m_queue; + + public: + // NOTE: Changed the declaration as viennacl comman=d queue does nt store + // the context + // + + cl_immediate_allocator(viennacl::ocl::context* const &ctx, + viennacl::ocl::command_queue* const &queue, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : cl_allocator_base(ctx, flags), + m_queue(queue) + { } + + cl_immediate_allocator(cl_immediate_allocator const &src) + : cl_allocator_base(src), m_queue(src.m_queue) + { } + + cl_immediate_allocator *copy() const + { + return new cl_immediate_allocator(*this); + } + + inline cl_mem allocate(size_t s); + bool is_deferred() const + { return false; } + + virtual ~cl_immediate_allocator() + {} + }; + + + // }}} + + + // }}} + /** @brief Manages an OpenCL context and provides the respective convenience functions for creating buffers, etc. * * This class was originally written before the OpenCL C++ bindings were standardized. @@ -84,6 +182,8 @@ public: /** @brief Sets the maximum number of devices to be set up for the context */ void default_device_num(vcl_size_t new_num) { default_device_num_ = new_num; } + /** Creating a memory pool */ + ////////// get and set preferred device type ///////////////////// /** @brief Returns the default device type for the context */ cl_device_type default_device_type() @@ -197,10 +297,23 @@ public: * @param flags OpenCL flags for the buffer creation * @param size Size of the memory buffer in bytes * @param ptr Optional pointer to CPU memory, with which the OpenCL memory should be initialized + * @param use_mempool Optional boolean to create memory through the memory pool. * @return A plain OpenCL handle. Either assign it to a viennacl::ocl::handle directly, or make sure that you free to memory manually if you no longer need the allocated memory. */ - cl_mem create_memory_without_smart_handle(cl_mem_flags flags, unsigned int size, void * ptr = NULL) const + cl_mem create_memory_without_smart_handle(cl_mem_flags flags, unsigned int size, void * ptr = NULL, bool use_mempool = false) const { + + if(use_mempool){ +#ifdef VIENNACL_DEBUG_ALL + std::cout << "[mempool]: querying for memory\n"; +#endif + cl_mem mem = get_mempool()->allocate(size); +#ifdef VIENNACL_DEBUG_ALL + std::cout << "[mempool]: gave memory at: " << mem << std::endl; + +#endif + return mem; + } #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Creating memory of size " << size << " for context " << h_ << " (unsafe, returning cl_mem directly)" << std::endl; #endif @@ -212,6 +325,18 @@ public: return mem; } + /** @brief Decerements the reference count of the memory in the memory pool **/ + void decrement_mem_ref_counter(cl_mem p, vcl_size_t s) const + { + get_mempool()->decrement_ref_counter(p, s); + } + + /** @brief Incerements the reference count of the memory in the memory pool **/ + void increment_mem_ref_counter(cl_mem p, vcl_size_t s) const + { + get_mempool()->increment_ref_counter(p, s); + } + /** @brief Creates a memory buffer within the context * @@ -251,6 +376,7 @@ public: /** @brief Adds a queue for the given device to the context */ void add_queue(cl_device_id dev) { + #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_CONTEXT) std::cout << "ViennaCL: Adding new queue for device " << dev << " to context " << h_ << std::endl; #endif @@ -263,6 +389,18 @@ public: VIENNACL_ERR_CHECK(err); queues_[dev].push_back(viennacl::ocl::command_queue(temp)); + + // register the allocator for the device + if(allocators_.find(dev) == allocators_.end()) + { + // did not find an queue for the present device => allot one + allocators_[dev] = tools::shared_ptr(new + cl_immediate_allocator(this, + &(queues_[dev][0]), + CL_MEM_READ_WRITE)); + mempools_[dev] = tools::shared_ptr> (new + memory_pool(*allocators_[dev])); + } } /** @brief Adds a queue for the given device to the context */ @@ -279,6 +417,15 @@ public: return queues_[devices_[current_device_id_].id()][current_queue_id_]; } + //get current mempool + tools::shared_ptr> const& get_mempool() const + { + typedef std::map< cl_device_id, tools::shared_ptr> > MempoolContainer; + MempoolContainer::const_iterator it = mempools_.find(devices_[current_device_id_].id()); + assert (it != mempools_.end()&&bool("Did not find a memory pool.")); + return it->second; + } + viennacl::ocl::command_queue const & get_queue() const { typedef std::map< cl_device_id, std::vector > QueueContainer; @@ -765,6 +912,10 @@ private: std::string build_options_; vcl_size_t pf_index_; vcl_size_t current_queue_id_; + + // Memory pool + std::map< cl_device_id, tools::shared_ptr> allocators_; + std::map< cl_device_id, tools::shared_ptr>> mempools_; }; //context @@ -822,6 +973,53 @@ inline void viennacl::ocl::kernel::set_work_size_defaults() } } +// {{{ definition of cl_immediate_allocator::allocate + +cl_mem cl_immediate_allocator::allocate(size_t s) +{ + + cl_mem ptr = + m_context->create_memory_without_smart_handle(m_flags, s, NULL); + + // Make sure the buffer gets allocated right here and right now. + // This looks (and is) expensive. But immediate allocators + // have their main use in memory pools, whose basic assumption + // is that allocation is too expensive anyway--but they rely + // on exact 'out-of-memory' information. + unsigned zero = 0; + cl_int err = clEnqueueWriteBuffer( + m_queue->handle().get(), + ptr, + /* is blocking */ CL_FALSE, + 0, std::min(s, sizeof(zero)), &zero, + 0, NULL, NULL + ); + VIENNACL_ERR_CHECK(err); + + // No need to wait for completion here. clWaitForEvents (e.g.) + // cannot return mem object allocation failures. This implies that + // the buffer is faulted onto the device on enqueue. + + return ptr; +} + +// }}} + +// {{{ pooled handle dec + +void pooled_clmem_handle::inc() +{ + p_context_->increment_mem_ref_counter(h_, m_size); +} + + +void pooled_clmem_handle::dec() +{ + p_context_->decrement_mem_ref_counter(h_, m_size); +} + +// }}} + } } diff --git a/viennacl/ocl/forwards.h b/viennacl/ocl/forwards.h index a3603e913e84eaf27dd3a1e537ecc5c9d1b72487..faa0d30a0b29ee7a53ecf2de83e559009c37de81 100644 --- a/viennacl/ocl/forwards.h +++ b/viennacl/ocl/forwards.h @@ -40,16 +40,17 @@ namespace viennacl /** @brief A tag denoting the default OpenCL device type (SDK-specific) */ struct default_tag {}; - class kernel; class device; class command_queue; class context; class program; - template + template class handle; + class pooled_clmem_handle; + template void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue); diff --git a/viennacl/ocl/handle.hpp b/viennacl/ocl/handle.hpp index 8283f848cc0f9146b16e1e251176bb10e20a5904..e4e5f6ba0ddd4bb29427c34372ba7b3eccf0620c 100644 --- a/viennacl/ocl/handle.hpp +++ b/viennacl/ocl/handle.hpp @@ -33,6 +33,7 @@ #include #include "viennacl/ocl/forwards.h" #include "viennacl/ocl/error.hpp" +#include "viennacl/forwards.h" namespace viennacl { @@ -154,7 +155,7 @@ namespace viennacl handle() : h_(0), p_context_(NULL) {} handle(const OCL_TYPE & something, viennacl::ocl::context const & c) : h_(something), p_context_(&c) {} handle(const handle & other) : h_(other.h_), p_context_(other.p_context_) { if (h_ != 0) inc(); } - ~handle() { if (h_ != 0) dec(); } + virtual ~handle() { if (h_ != 0) dec(); } /** @brief Copies the OpenCL handle from the provided handle. Does not take ownership like e.g. std::auto_ptr<>, so both handle objects are valid (more like shared_ptr). */ handle & operator=(const handle & other) @@ -190,12 +191,12 @@ namespace viennacl const OCL_TYPE & get() const { return h_; } - viennacl::ocl::context const & context() const + virtual viennacl::ocl::context const & context() const { assert(p_context_ != NULL && bool("Logic error: Accessing dangling context from handle.")); return *p_context_; } - void context(viennacl::ocl::context const & c) { p_context_ = &c; } + virtual void context(viennacl::ocl::context const & c) { p_context_ = &c; } /** @brief Swaps the OpenCL handle of two handle objects */ @@ -208,19 +209,89 @@ namespace viennacl viennacl::ocl::context const * tmp2 = other.p_context_; other.p_context_ = this->p_context_; this->p_context_ = tmp2; - return *this; } /** @brief Manually increment the OpenCL reference count. Typically called automatically, but is necessary if user-supplied memory objects are wrapped. */ - void inc() { handle_inc_dec_helper::inc(h_); } + virtual void inc() { handle_inc_dec_helper::inc(h_); } /** @brief Manually decrement the OpenCL reference count. Typically called automatically, but might be useful with user-supplied memory objects. */ - void dec() { handle_inc_dec_helper::dec(h_); } - private: + virtual void dec() { handle_inc_dec_helper::dec(h_); } + protected: OCL_TYPE h_; viennacl::ocl::context const * p_context_; }; + // {{{ pooled handle + // + + class pooled_clmem_handle: public handle + { + protected: + typedef handle super; + + public: + pooled_clmem_handle() : super(), m_size(0) {} + pooled_clmem_handle(const cl_mem & something, viennacl::ocl::context const & c, vcl_size_t & _s) : super(something, c), m_size(_s) + {if(h_!=0) + { + inc(); + cl_int err = clRetainMemObject(something); + VIENNACL_ERR_CHECK(err); + } + } + pooled_clmem_handle(const pooled_clmem_handle & other) : super(other), m_size(other.m_size) + { + if(h_!=0) + inc(); + } + + pooled_clmem_handle & operator=(const pooled_clmem_handle & other) + { + if (h_ != 0) + dec(); + h_ = other.h_; + p_context_ = other.p_context_; + m_size = other.m_size; + inc(); + return *this; + } + + pooled_clmem_handle & operator=(const cl_mem & something) + { + std::cerr << "[pooled_handle]: Pooled handle needs to know about size\n"; + throw std::exception(); + return *this; + } + + /** @brief Swaps the OpenCL handle of two handle objects */ + pooled_clmem_handle & swap(pooled_clmem_handle & other) + { + cl_mem tmp = other.h_; + other.h_ = this->h_; + this->h_ = tmp; + + viennacl::ocl::context const * tmp2 = other.p_context_; + other.p_context_ = this->p_context_; + this->p_context_ = tmp2; + + size_t tmp3 = other.m_size; + other.m_size = this->m_size; + this->m_size = tmp3; + + return *this; + } + + inline virtual void inc(); + inline virtual void dec(); + + virtual ~pooled_clmem_handle() { + if (h_!=0) dec(); + } + + private: + size_t m_size; + }; + } //namespace ocl } //namespace viennacl diff --git a/viennacl/ocl/kernel.hpp b/viennacl/ocl/kernel.hpp index 5f2cab139d87fa4410cf8ababe74822e952da8c4..900e6d80d2568502a2cb883663192b84ea10446d 100644 --- a/viennacl/ocl/kernel.hpp +++ b/viennacl/ocl/kernel.hpp @@ -223,6 +223,30 @@ namespace viennacl VIENNACL_ERR_CHECK(err); } + + //forward handles directly: + /** @brief Sets an OpenCL object at the provided position */ + template + void arg(unsigned int pos, viennacl::ocl::handle const & h) + { + CL_TYPE temp = h.get(); + #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) + std::cout << "ViennaCL: Setting handle kernel argument " << temp << " at pos " << pos << " for kernel " << name_ << std::endl; + #endif + cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(CL_TYPE), (void*)&temp); + VIENNACL_ERR_CHECK(err); + } + + void arg(unsigned int pos, viennacl::ocl::pooled_clmem_handle const & h) + { + cl_mem temp = h.get(); + #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) + std::cout << "ViennaCL: Setting handle kernel argument " << temp << " at pos " << pos << " for kernel " << name_ << std::endl; + #endif + cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(cl_mem), (void*)&temp); + VIENNACL_ERR_CHECK(err); + } + //generic handling: call .handle() member /** @brief Sets an OpenCL memory object at the provided position */ template @@ -238,19 +262,6 @@ namespace viennacl VIENNACL_ERR_CHECK(err); } - //forward handles directly: - /** @brief Sets an OpenCL object at the provided position */ - template - void arg(unsigned int pos, viennacl::ocl::handle const & h) - { - CL_TYPE temp = h.get(); - #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL) - std::cout << "ViennaCL: Setting handle kernel argument " << temp << " at pos " << pos << " for kernel " << name_ << std::endl; - #endif - cl_int err = clSetKernelArg(handle_.get(), pos, sizeof(CL_TYPE), (void*)&temp); - VIENNACL_ERR_CHECK(err); - } - //local buffer argument: /** @brief Sets an OpenCL local memory object at the provided position */ diff --git a/viennacl/ocl/mempool/bitlog.hpp b/viennacl/ocl/mempool/bitlog.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5a0ba633d455e2277a3ebbec4d1afd504c3b90d4 --- /dev/null +++ b/viennacl/ocl/mempool/bitlog.hpp @@ -0,0 +1,94 @@ +// Base-2 logarithm bithack. +// +// Copyright (C) 2009 Andreas Kloeckner +// Copyright (C) Sean Eron Anderson (in the public domain) +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + + +#ifndef _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP +#define _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP + + +#include +#include + +namespace viennacl +{ +namespace mempool +{ + /* from http://graphics.stanford.edu/~seander/bithacks.html */ + + + const char log_table_8[] = + { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + }; + + inline unsigned bitlog2_16(uint16_t v) + { + if (unsigned long t = v >> 8) + return 8+log_table_8[t]; + else + return log_table_8[v]; + } + + inline unsigned bitlog2_32(uint32_t v) + { + if (uint16_t t = v >> 16) + return 16+bitlog2_16(t); + else + return bitlog2_16(v); + } + + inline unsigned bitlog2(unsigned long v) + { +#if (ULONG_MAX != 4294967295) + if (uint32_t t = v >> 32) + return 32+bitlog2_32(t); + else +#endif + return bitlog2_32(v); + } +} // namespace mempool +} // namespace viennacl + + + + + +#endif diff --git a/viennacl/ocl/mempool/mempool.hpp b/viennacl/ocl/mempool/mempool.hpp new file mode 100644 index 0000000000000000000000000000000000000000..12abdc62a56a0822e6bbf6a56d6b0023cdd28997 --- /dev/null +++ b/viennacl/ocl/mempool/mempool.hpp @@ -0,0 +1,357 @@ +// Abstract memory pool implementation +// +// Copyright (C) 2009-17 Andreas Kloeckner +// 2018-19 Kaushik Kulkarni +// +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + + +#ifndef VIENNACL_OCL_MEMPOOL_HPP_ +#define VIENNACL_OCL_MEMPOOL_HPP_ + +#ifdef __APPLE__ +#include +#else +#include +#endif + +#include +#include +#include +#include +#include + +#include "viennacl/ocl/mempool/bitlog.hpp" +#include "viennacl/ocl/mempool/mempool_utils.hpp" +#include "viennacl/ocl/error.hpp" + +namespace viennacl +{ +namespace ocl +{ + + template + class memory_pool : mempool::noncopyable + { + public: + typedef size_t size_type; + + private: + typedef uint32_t bin_nr_t; + typedef std::vector bin_t; + + typedef std::map container_t; + container_t m_container; + typedef typename container_t::value_type bin_pair_t; + + std::map m_reference_count; // ref counter + std::unique_ptr m_allocator; + + // A held block is one that's been released by the application, but that + // we are keeping around to dish out again. + unsigned m_held_blocks; + + // An active block is one that is in use by the application. + unsigned m_active_blocks; + + bool m_stop_holding; + int m_trace; + + public: + memory_pool(Allocator const &alloc=Allocator()) + : m_allocator(alloc.copy()), + m_held_blocks(0), m_active_blocks(0), m_stop_holding(false), + m_trace(false) + { + if (m_allocator->is_deferred()) + { + std::cerr << "Memory pools expect non-deferred " + "semantics from their allocators. You passed a deferred " + "allocator, i.e. an allocator whose allocations can turn out to " + "be unavailable long after allocation.\n"; + throw std::exception(); + } + } + + virtual ~memory_pool() + { + free_held(); + } + + static const unsigned mantissa_bits = 2; + static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; + + static bin_nr_t bin_number(size_type size) + { + signed l = viennacl::mempool::bitlog2(size); + size_type shifted = viennacl::mempool::signed_right_shift(size, + l-signed(mantissa_bits)); + if (size && (shifted & (1 << mantissa_bits)) == 0) + throw std::runtime_error("memory_pool::bin_number: bitlog2 fault"); + size_type chopped = shifted & mantissa_mask; + return l << mantissa_bits | chopped; + } + + void set_trace(bool flag) + { + if (flag) + ++m_trace; + else + --m_trace; + } + + static size_type alloc_size(bin_nr_t bin) + { + bin_nr_t exponent = bin >> mantissa_bits; + bin_nr_t mantissa = bin & mantissa_mask; + + size_type ones = viennacl::mempool::signed_left_shift(1, + signed(exponent)-signed(mantissa_bits) + ); + if (ones) ones -= 1; + + size_type head = viennacl::mempool::signed_left_shift( + (1<second; + } + else + return it->second; + } + + void inc_held_blocks() + { + if (m_held_blocks == 0) + start_holding_blocks(); + ++m_held_blocks; + } + + void dec_held_blocks() + { + --m_held_blocks; + if (m_held_blocks == 0) + stop_holding_blocks(); + } + + virtual void start_holding_blocks() + { } + + virtual void stop_holding_blocks() + { } + + public: + cl_mem allocate(size_type size) + { + bin_nr_t bin_nr = bin_number(size); + bin_t &bin = get_bin(bin_nr); + + if (bin.size()) + { + if (m_trace) + std::cout + << "[pool] allocation of size " << size << " served from bin " << bin_nr + << " which contained " << bin.size() << " entries" << std::endl; + + cl_mem result = pop_block_from_bin(bin, size); + assert(m_reference_count.find(result) == m_reference_count.end() && bool("Memory already registered in reference counter.")); + m_reference_count[result] = 0; + cl_int err = clRetainMemObject(result); + VIENNACL_ERR_CHECK(err); + + return result; + } + + size_type alloc_sz = alloc_size(bin_nr); + + assert(bin_number(alloc_sz) == bin_nr); + + if (m_trace) + std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl; + + try { + cl_mem result = get_from_allocator(alloc_sz); + + assert(m_reference_count.find(result) == m_reference_count.end() && bool("Memory already registered in reference counter.")); + + cl_int err = clRetainMemObject(result); + VIENNACL_ERR_CHECK(err); + m_reference_count[result] = 0; + + return result; + } + catch (viennacl::ocl::mem_object_allocation_failure &e) + { + throw; + } + + throw viennacl::ocl::mem_object_allocation_failure(); + } + + void free(cl_mem p, size_type size) + { +#ifdef VIENNACL_DEBUG_ALL + std::cout << "[mempool]: freeing the memory " << + p << ". So that it could be used again."<< std::endl; +#endif + --m_active_blocks; + bin_nr_t bin_nr = bin_number(size); + + if (!m_stop_holding) + { + inc_held_blocks(); + get_bin(bin_nr).push_back(p); + + if (m_trace) + std::cout << "[pool] block of size " << size << " returned to bin " + << bin_nr << " which now contains " << get_bin(bin_nr).size() + << " entries" << std::endl; + } + else + m_allocator->free(p); + } + + void free_held() + { + for (bin_pair_t &bin_pair: m_container) + { + bin_t &bin = bin_pair.second; + + while (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + } + } + + assert(m_held_blocks == 0); + } + + void stop_holding() + { + m_stop_holding = true; + free_held(); + } + + unsigned active_blocks() + { return m_active_blocks; } + + unsigned held_blocks() + { return m_held_blocks; } + + bool try_to_free_memory() + { + // free largest stuff first + for (bin_pair_t &bin_pair: viennacl::mempool::reverse(m_container)) + { + bin_t &bin = bin_pair.second; + + if (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + + return true; + } + } + + return false; + } + + void increment_ref_counter(cl_mem p, size_type s) + { +#ifdef VIENNACL_DEBUG_ALL + std::cout << "[mempool]: Incrementing for " << p << std::endl; +#endif + if(m_reference_count.find(p) == m_reference_count.end()) + { + std::cerr << "Did not find a memory to reference count.\n"; + throw std::exception(); + } + + ++m_reference_count[p]; + } + + void decrement_ref_counter(cl_mem p, size_type s) + { +#ifdef VIENNACL_DEBUG_ALL + std::cout << "[mempool]: Decrementing for " << p << std::endl; +#endif + if(m_reference_count.find(p) == m_reference_count.end()) + { + std::cerr << "Did not find a memory to reference count.\n"; + throw std::exception(); + } + + --m_reference_count[p]; + + if(m_reference_count[p] == 0) + { + // this is not longer useful => free it + free(p, s); + + // no longer need to store this in the map + m_reference_count.erase(p); + } + } + + private: + cl_mem get_from_allocator(size_type alloc_sz) + { + cl_mem result = m_allocator->allocate(alloc_sz); + ++m_active_blocks; + + return result; + } + + cl_mem pop_block_from_bin(bin_t &bin, size_type size) + { + cl_mem result = bin.back(); + bin.pop_back(); + + dec_held_blocks(); + ++m_active_blocks; + + return result; + } + }; +} +} + +#endif diff --git a/viennacl/ocl/mempool/mempool_utils.hpp b/viennacl/ocl/mempool/mempool_utils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..de04ee314aa1055ecee45faf7a41292d704f5b6c --- /dev/null +++ b/viennacl/ocl/mempool/mempool_utils.hpp @@ -0,0 +1,137 @@ +// Various odds and ends +// +// Copyright (C) 2009 Andreas Kloeckner +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + + +#ifndef VIENNACL_MEMPOOL_UTILS_HPP +#define VIENNACL_MEMPOOL_UTILS_HPP + +#include +#include + +namespace viennacl +{ +namespace mempool +{ + + // {{{ error + class error : public std::runtime_error + { + private: + std::string m_routine; + cl_int m_code; + + // This is here because clLinkProgram returns a program + // object *just* so that there is somewhere for it to + // stuff the linker logs. :/ + bool m_program_initialized; + cl_program m_program; + + public: + error(const char *routine, cl_int c, const char *msg="") + : std::runtime_error(msg), m_routine(routine), m_code(c), + m_program_initialized(false), m_program(nullptr) + { } + + error(const char *routine, cl_program prg, cl_int c, + const char *msg="") + : std::runtime_error(msg), m_routine(routine), m_code(c), + m_program_initialized(true), m_program(prg) + { } + + virtual ~error() + { + if (m_program_initialized) + clReleaseProgram(m_program); + } + + const std::string &routine() const + { + return m_routine; + } + + cl_int code() const + { + return m_code; + } + + bool is_out_of_memory() const + { + return (code() == CL_MEM_OBJECT_ALLOCATION_FAILURE + || code() == CL_OUT_OF_RESOURCES + || code() == CL_OUT_OF_HOST_MEMORY); + } + }; + + // }}} + + template + inline T signed_left_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x >> -shift_amount; + else + return x << shift_amount; + } + + template + inline T signed_right_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x << -shift_amount; + else + return x >> shift_amount; + } + + // https://stackoverflow.com/a/28139075 + template + struct reversion_wrapper { T& iterable; }; + + template + auto begin (reversion_wrapper w) { return w.iterable.rbegin(); } + + template + auto end (reversion_wrapper w) { return w.iterable.rend(); } + + template + reversion_wrapper reverse (T&& iterable) { return { iterable }; } + + + // https://stackoverflow.com/a/44175911 + class noncopyable { + public: + noncopyable() = default; + ~noncopyable() = default; + + private: + noncopyable(const noncopyable&) = delete; + noncopyable& operator=(const noncopyable&) = delete; + }; + +} +} + +#endif + +// vim:foldmethod=marker diff --git a/viennacl/scalar.hpp b/viennacl/scalar.hpp index 4eaf7522a1ad9f5b04977fc2e053ab8e2b2c8c6d..9b08ca489aa931b0814555e3b1fc488bb0c5a5b7 100644 --- a/viennacl/scalar.hpp +++ b/viennacl/scalar.hpp @@ -343,12 +343,12 @@ private: * * @tparam NumericT Either float or double. Checked at compile time. */ -template +template class scalar { - typedef scalar self_type; + typedef scalar self_type; public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle handle_type; typedef vcl_size_t size_type; /** @brief Returns the underlying host scalar type. */ @@ -411,7 +411,7 @@ public: } /** @brief Assigns a vector entry. */ - self_type & operator= (entry_proxy const & other) + self_type & operator= (entry_proxy const & other) { init_if_necessary(viennacl::traits::context(other)); viennacl::backend::memory_copy(other.handle(), val_, other.index() * sizeof(NumericT), 0, sizeof(NumericT)); @@ -419,7 +419,7 @@ public: } /** @brief Assigns the value from another scalar. */ - self_type & operator= (scalar const & other) + self_type & operator= (scalar const & other) { init_if_necessary(viennacl::traits::context(other)); viennacl::backend::memory_copy(other.handle(), val_, 0, 0, sizeof(NumericT)); diff --git a/viennacl/sliced_ell_matrix.hpp b/viennacl/sliced_ell_matrix.hpp index f66b0d4d6807ad821c879d79be3a533f9d9a8361..3c7ab74e050daba04e9889c7de69eb2a09c99732 100644 --- a/viennacl/sliced_ell_matrix.hpp +++ b/viennacl/sliced_ell_matrix.hpp @@ -46,7 +46,7 @@ template class sliced_ell_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; typedef vcl_size_t size_type; diff --git a/viennacl/toeplitz_matrix.hpp b/viennacl/toeplitz_matrix.hpp index 1891a6aa24e0dd8573556d51f818ed4b0e15e3b7..00b998ec9856d052b7acd3754e2776cc8a4c1fcb 100644 --- a/viennacl/toeplitz_matrix.hpp +++ b/viennacl/toeplitz_matrix.hpp @@ -43,7 +43,7 @@ template class toeplitz_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; /** diff --git a/viennacl/tools/entry_proxy.hpp b/viennacl/tools/entry_proxy.hpp index 64114eb2f152d452f9034141527618f0ab409dd0..7b055794e05f2edf28e511ccd5e9903cddcc18b4 100644 --- a/viennacl/tools/entry_proxy.hpp +++ b/viennacl/tools/entry_proxy.hpp @@ -37,11 +37,11 @@ namespace viennacl * * @tparam NumericT Either float or double */ -template +template class entry_proxy { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle handle_type; /** @brief The constructor for the proxy class. Declared explicit to avoid any surprises created by the compiler. * @@ -159,7 +159,7 @@ private: } vcl_size_t index_; - viennacl::backend::mem_handle & mem_handle_; + viennacl::backend::mem_handle & mem_handle_; }; //entry_proxy @@ -175,12 +175,12 @@ private: * * @tparam NumericT Either float or double */ -template +template class const_entry_proxy { - typedef const_entry_proxy self_type; + typedef const_entry_proxy self_type; public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle handle_type; /** @brief The constructor for the proxy class. Declared explicit to avoid any surprises created by the compiler. * @@ -226,7 +226,7 @@ private: } vcl_size_t index_; - viennacl::backend::mem_handle const & mem_handle_; + handle_type const & mem_handle_; }; //entry_proxy } diff --git a/viennacl/traits/context.hpp b/viennacl/traits/context.hpp index c84ab99c553a99a2b3fb5581949dc4871858f928..0613ee37b4552bbdc399d8e1b4cab19e217f7d79 100644 --- a/viennacl/traits/context.hpp +++ b/viennacl/traits/context.hpp @@ -48,7 +48,8 @@ viennacl::context context(T const & t) } /** @brief Returns an ID for the currently active memory domain of an object */ -inline viennacl::context context(viennacl::backend::mem_handle const & h) +template > +inline viennacl::context context(viennacl::backend::mem_handle const & h) { #ifdef VIENNACL_WITH_OPENCL if (h.get_active_handle_id() == OPENCL_MEMORY) diff --git a/viennacl/traits/handle.hpp b/viennacl/traits/handle.hpp index 7a2af9a864933bf7098b49f4b1c57e48140bcfc6..e1d69f3df77ad8278324ba7f982439f22ca6d541 100644 --- a/viennacl/traits/handle.hpp +++ b/viennacl/traits/handle.hpp @@ -38,14 +38,14 @@ namespace traits // /** @brief Returns the generic memory handle of an object. Non-const version. */ template -viennacl::backend::mem_handle & handle(T & obj) +typename T::handle_type & handle(T & obj) { return obj.handle(); } /** @brief Returns the generic memory handle of an object. Const-version. */ template -viennacl::backend::mem_handle const & handle(T const & obj) +typename T::handle_type const & handle(T const & obj) { return obj.handle(); } @@ -58,96 +58,96 @@ inline long handle(long val) { return val; } //for unification purposes whe inline float handle(float val) { return val; } //for unification purposes when passing CPU-scalars to kernels inline double handle(double val) { return val; } //for unification purposes when passing CPU-scalars to kernels -template -viennacl::backend::mem_handle & handle(viennacl::scalar_expression< const LHS, const RHS, OP> & obj) +template> +viennacl::backend::mem_handle & handle(viennacl::scalar_expression< const LHS, const RHS, OP> & obj) { return handle(obj.lhs()); } -template -viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj); +template> +viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj); -template -viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj); +template> +viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj); -template -viennacl::backend::mem_handle const & handle(viennacl::scalar_expression< const LHS, const RHS, OP> const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::scalar_expression< const LHS, const RHS, OP> const & obj) { return handle(obj.lhs()); } // proxy objects require extra care (at the moment) -template -viennacl::backend::mem_handle & handle(viennacl::vector_base & obj) +template> +viennacl::backend::mem_handle & handle(viennacl::vector_base & obj) { return obj.handle(); } -template -viennacl::backend::mem_handle const & handle(viennacl::vector_base const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::vector_base const & obj) { return obj.handle(); } -template -viennacl::backend::mem_handle & handle(viennacl::matrix_range & obj) +template> +viennacl::backend::mem_handle & handle(viennacl::matrix_range & obj) { return obj.get().handle(); } -template -viennacl::backend::mem_handle const & handle(viennacl::matrix_range const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::matrix_range const & obj) { return obj.get().handle(); } -template -viennacl::backend::mem_handle & handle(viennacl::matrix_slice & obj) +template> +viennacl::backend::mem_handle & handle(viennacl::matrix_slice & obj) { return obj.get().handle(); } -template -viennacl::backend::mem_handle const & handle(viennacl::matrix_slice const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::matrix_slice const & obj) { return obj.get().handle(); } -template -viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj) +template +viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj) { return handle(obj.lhs()); } -template -viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj) { return handle(obj.rhs()); } -template -viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::vector_expression const & obj) { return handle(obj.rhs()); } -template -viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj) +template +viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj) { return handle(obj.lhs()); } -template -viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj) { return handle(obj.rhs()); } -template -viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj) +template> +viennacl::backend::mem_handle const & handle(viennacl::matrix_expression const & obj) { return handle(obj.rhs()); } @@ -158,36 +158,50 @@ viennacl::backend::mem_handle const & handle(viennacl::matrix_expression -typename viennacl::backend::mem_handle::ram_handle_type & ram_handle(T & obj) +template> +typename viennacl::backend::mem_handle::ram_handle_type & ram_handle(T & obj) { return viennacl::traits::handle(obj).ram_handle(); } /** @brief Generic helper routine for extracting the RAM handle of a ViennaCL object. Const version. */ -template -typename viennacl::backend::mem_handle::ram_handle_type const & ram_handle(T const & obj) +template> +typename viennacl::backend::mem_handle::ram_handle_type const & ram_handle(T const & obj) { return viennacl::traits::handle(obj).ram_handle(); } /** \cond */ -inline viennacl::backend::mem_handle::ram_handle_type & ram_handle(viennacl::backend::mem_handle & h) +template +inline viennacl::tools::shared_ptr & ram_handle(viennacl::backend::mem_handle & h) { return h.ram_handle(); } -inline viennacl::backend::mem_handle::ram_handle_type const & ram_handle(viennacl::backend::mem_handle const & h) +template > +inline viennacl::tools::shared_ptr const & ram_handle(viennacl::backend::mem_handle const & h) { return h.ram_handle(); } -/** \endcond */ +/** \endcond */ // // OpenCL handle extraction // #ifdef VIENNACL_WITH_OPENCL /** @brief Generic helper routine for extracting the OpenCL handle of a ViennaCL object. Non-const version. */ +template +viennacl::ocl::pooled_clmem_handle & opencl_handle(viennacl::vector_base & obj) +{ + return viennacl::traits::handle(obj).opencl_handle(); +} + +template +viennacl::ocl::pooled_clmem_handle const & opencl_handle(viennacl::vector_base const & obj) +{ + return viennacl::traits::handle(obj).opencl_handle(); +} + template viennacl::ocl::handle & opencl_handle(T & obj) { @@ -220,6 +234,12 @@ viennacl::ocl::handle const & opencl_handle(viennacl::vector_expression< return viennacl::traits::handle(obj.rhs()).opencl_handle(); } +template +viennacl::ocl::pooled_clmem_handle const & opencl_handle(viennacl::vector_expression, op_prod> const & obj) +{ + return viennacl::traits::handle(obj.rhs()).opencl_handle(); +} + template viennacl::ocl::context & opencl_context(T const & obj) { diff --git a/viennacl/traits/size.hpp b/viennacl/traits/size.hpp index 2e2e0d763f304c7b31e71dbac866379c0252f60f..3f3e1769b518458f8bc5f30dba809fe6fe97ac67 100644 --- a/viennacl/traits/size.hpp +++ b/viennacl/traits/size.hpp @@ -384,8 +384,8 @@ vcl_size_t size(vector_expression -vcl_size_t internal_size(vector_base const & vec) +template +vcl_size_t internal_size(vector_base const & vec) { return vec.internal_size(); } diff --git a/viennacl/traits/start.hpp b/viennacl/traits/start.hpp index c81a3b35d28c772a8368bea557be677e91d6ea94..a01581e2c45565a206a4a4c9247b7a815563ec3d 100644 --- a/viennacl/traits/start.hpp +++ b/viennacl/traits/start.hpp @@ -47,9 +47,9 @@ start(T const & obj) } //ViennaCL vector leads to start index 0: -template -typename result_of::size_type >::type -start(viennacl::vector const &) +template +typename result_of::size_type >::type +start(viennacl::vector const &) { return 0; } diff --git a/viennacl/traits/stride.hpp b/viennacl/traits/stride.hpp index 68c46814ae316fa222fc00bc167e9a9eee191c2b..dcc79268412fbea1a89d9e828dd2b1daa07efc1e 100644 --- a/viennacl/traits/stride.hpp +++ b/viennacl/traits/stride.hpp @@ -40,9 +40,9 @@ namespace traits // // inc: Increment for vectors. Defaults to 1 // -template -typename result_of::size_type< viennacl::vector_base >::type -stride(viennacl::vector_base const & s) { return s.stride(); } +template +typename result_of::size_type< viennacl::vector_base >::type +stride(viennacl::vector_base const & s) { return s.stride(); } // // inc1: Row increment for matrices. Defaults to 1 diff --git a/viennacl/vandermonde_matrix.hpp b/viennacl/vandermonde_matrix.hpp index d3f3a66c2abf542ddec2ec513c8681856068ba33..6a9eff153ff984e11662c2d629ecd6a146e28662 100644 --- a/viennacl/vandermonde_matrix.hpp +++ b/viennacl/vandermonde_matrix.hpp @@ -44,7 +44,7 @@ template class vandermonde_matrix { public: - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle<> handle_type; typedef scalar::ResultType> value_type; /** diff --git a/viennacl/vector.hpp b/viennacl/vector.hpp index 69bf3d4c38de643a235613628b09b9a28eac62bc..c735044f812aec747a586c59e613422c50a0d5d9 100644 --- a/viennacl/vector.hpp +++ b/viennacl/vector.hpp @@ -104,15 +104,15 @@ private: * @tparam NumericT The underlying floating point type (either float or double) * @tparam AlignmentV Alignment of the underlying vector, @see vector */ -template +template class const_vector_iterator { - typedef const_vector_iterator self_type; + typedef const_vector_iterator self_type; public: typedef scalar value_type; typedef vcl_size_t size_type; typedef vcl_ptrdiff_t difference_type; - typedef viennacl::backend::mem_handle handle_type; + typedef viennacl::backend::mem_handle handle_type; //const_vector_iterator() {} @@ -122,7 +122,7 @@ public: * @param start First index of the element in the vector pointed to be the iterator (for vector_range and vector_slice) * @param stride Stride for the support of vector_slice */ - const_vector_iterator(vector_base const & vec, + const_vector_iterator(vector_base const & vec, size_type index, size_type start = 0, size_type stride = 1) : elements_(vec.handle()), index_(index), start_(start), stride_(stride) {} @@ -142,7 +142,7 @@ public: value_type operator*(void) const { value_type result; - result = const_entry_proxy(start_ + index_ * stride(), elements_); + result = const_entry_proxy(start_ + index_ * stride(), elements_); return result; } self_type operator++(void) { ++index_; return *this; } @@ -201,11 +201,11 @@ protected: * @tparam NumericT The underlying floating point type (either float or double) * @tparam AlignmentV Alignment of the underlying vector, @see vector */ -template -class vector_iterator : public const_vector_iterator +template +class vector_iterator : public const_vector_iterator { - typedef const_vector_iterator base_type; - typedef vector_iterator self_type; + typedef const_vector_iterator base_type; + typedef vector_iterator self_type; public: typedef typename base_type::handle_type handle_type; typedef typename base_type::size_type size_type; @@ -221,15 +221,15 @@ public: * @param start Offset from the beginning of the underlying vector (for ranges and slices) * @param stride Stride for slices */ - vector_iterator(vector_base & vec, + vector_iterator(vector_base & vec, size_type index, size_type start = 0, size_type stride = 1) : base_type(vec, index, start, stride), elements_(vec.handle()) {} //vector_iterator(base_type const & b) : base_type(b) {} - entry_proxy operator*(void) + entry_proxy operator*(void) { - return entry_proxy(base_type::start_ + base_type::index_ * base_type::stride(), elements_); + return entry_proxy(base_type::start_ + base_type::index_ * base_type::stride(), elements_); } difference_type operator-(self_type const & other) const { difference_type result = base_type::index_; return (result - static_cast(other.index_)); } @@ -247,136 +247,138 @@ private: }; -template -vector_base::vector_base() : size_(0), start_(0), stride_(1), internal_size_(0) { /* Note: One must not call ::init() here because a vector might have been created globally before the backend has become available */ } +template +vector_base::vector_base() : size_(0), start_(0), stride_(1), internal_size_(0) { /* Note: One must not call ::init() here because a vector might have been created globally before the backend has become available */ } -template -vector_base::vector_base(viennacl::backend::mem_handle & h, - size_type vec_size, size_type vec_start, size_type vec_stride) - : size_(vec_size), start_(vec_start), stride_(vec_stride), internal_size_(vec_size), elements_(h) {} +template +vector_base::vector_base(viennacl::backend::mem_handle & h, + size_type vec_size, size_type vec_start, size_type vec_stride) +: size_(vec_size), start_(vec_start), stride_(vec_stride), internal_size_(vec_size), elements_(h) {} -template -vector_base::vector_base(size_type vec_size, viennacl::context ctx) - : size_(vec_size), start_(0), stride_(1), internal_size_(viennacl::tools::align_to_multiple(size_, dense_padding_size)) +template +vector_base::vector_base(size_type vec_size, viennacl::context ctx) +: size_(vec_size), start_(0), stride_(1), internal_size_(viennacl::tools::align_to_multiple(size_, dense_padding_size)) { - if (size_ > 0) - { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx); - clear(); - } +if (size_ > 0) +{ + // [kk:] this is the constructor that we are concerned about + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), ctx, NULL); + clear(); } +} + // CUDA or host memory: -template -vector_base::vector_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type vec_size, vcl_size_t start, size_type stride) - : size_(vec_size), start_(start), stride_(stride), internal_size_(vec_size) +template +vector_base::vector_base(NumericT * ptr_to_mem, viennacl::memory_types mem_type, size_type vec_size, vcl_size_t start, size_type stride) +: size_(vec_size), start_(start), stride_(stride), internal_size_(vec_size) +{ +if (mem_type == viennacl::CUDA_MEMORY) { - if (mem_type == viennacl::CUDA_MEMORY) - { #ifdef VIENNACL_WITH_CUDA - elements_.switch_active_handle_id(viennacl::CUDA_MEMORY); - elements_.cuda_handle().reset(reinterpret_cast(ptr_to_mem)); - elements_.cuda_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. + elements_.switch_active_handle_id(viennacl::CUDA_MEMORY); + elements_.cuda_handle().reset(reinterpret_cast(ptr_to_mem)); + elements_.cuda_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. #else - throw cuda_not_available_exception(); + throw cuda_not_available_exception(); #endif - } - else if (mem_type == viennacl::MAIN_MEMORY) - { - elements_.switch_active_handle_id(viennacl::MAIN_MEMORY); - elements_.ram_handle().reset(reinterpret_cast(ptr_to_mem)); - elements_.ram_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. - } +} +else if (mem_type == viennacl::MAIN_MEMORY) +{ + elements_.switch_active_handle_id(viennacl::MAIN_MEMORY); + elements_.ram_handle().reset(reinterpret_cast(ptr_to_mem)); + elements_.ram_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. +} - elements_.raw_size(sizeof(NumericT) * vec_size); +elements_.raw_size(sizeof(NumericT) * vec_size); } #ifdef VIENNACL_WITH_OPENCL -template -vector_base::vector_base(cl_mem existing_mem, size_type vec_size, size_type start, size_type stride, viennacl::context ctx) - : size_(vec_size), start_(start), stride_(stride), internal_size_(vec_size) +template +vector_base::vector_base(cl_mem existing_mem, size_type vec_size, size_type start, size_type stride, viennacl::context ctx) +: size_(vec_size), start_(start), stride_(stride), internal_size_(vec_size) { - elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); - elements_.opencl_handle() = existing_mem; - elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. - elements_.opencl_handle().context(ctx.opencl_context()); - elements_.raw_size(sizeof(NumericT) * vec_size); +elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); +elements_.opencl_handle() = existing_mem; +elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed. +elements_.opencl_handle().context(ctx.opencl_context()); +elements_.raw_size(sizeof(NumericT) * vec_size); } #endif -template +template template -vector_base::vector_base(vector_expression const & proxy) - : size_(viennacl::traits::size(proxy)), start_(0), stride_(1), internal_size_(viennacl::tools::align_to_multiple(size_, dense_padding_size)) +vector_base::vector_base(vector_expression const & proxy) +: size_(viennacl::traits::size(proxy)), start_(0), stride_(1), internal_size_(viennacl::tools::align_to_multiple(size_, dense_padding_size)) { - if (size_ > 0) - { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy)); - clear(); - } - self_type::operator=(proxy); +if (size_ > 0) +{ + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy)); + clear(); +} +self_type::operator=(proxy); } // Copy CTOR: -template -vector_base::vector_base(const vector_base & other) : - size_(other.size_), start_(0), stride_(1), - internal_size_(viennacl::tools::align_to_multiple(other.size_, dense_padding_size)) +template +vector_base::vector_base(const vector_base & other) : +size_(other.size_), start_(0), stride_(1), +internal_size_(viennacl::tools::align_to_multiple(other.size_, dense_padding_size)) { - elements_.switch_active_handle_id(viennacl::traits::active_handle_id(other)); - if (internal_size() > 0) - { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(other)); - clear(); - self_type::operator=(other); - } +elements_.switch_active_handle_id(viennacl::traits::active_handle_id(other)); +if (internal_size() > 0) +{ + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(other)); + clear(); + self_type::operator=(other); +} } // Conversion CTOR: -template +template template -vector_base::vector_base(const vector_base & other) : - size_(other.size()), start_(0), stride_(1), - internal_size_(viennacl::tools::align_to_multiple(other.size(), dense_padding_size)) +vector_base::vector_base(const vector_base & other) : +size_(other.size()), start_(0), stride_(1), +internal_size_(viennacl::tools::align_to_multiple(other.size(), dense_padding_size)) { - elements_.switch_active_handle_id(viennacl::traits::active_handle_id(other)); - if (internal_size() > 0) - { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(other)); - clear(); - self_type::operator=(other); - } +elements_.switch_active_handle_id(viennacl::traits::active_handle_id(other)); +if (internal_size() > 0) +{ + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(other)); + clear(); + self_type::operator=(other); +} } -template -vector_base & vector_base::operator=(const self_type & vec) +template +vector_base & vector_base::operator=(const self_type & vec) { - assert( ( (vec.size() == size()) || (size() == 0) ) - && bool("Incompatible vector sizes!")); +assert( ( (vec.size() == size()) || (size() == 0) ) + && bool("Incompatible vector sizes!")); - if (&vec==this) - return *this; +if (&vec==this) + return *this; - if (vec.size() > 0) +if (vec.size() > 0) +{ + if (size_ == 0) { - if (size_ == 0) - { - size_ = vec.size(); - internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); - elements_.switch_active_handle_id(vec.handle().get_active_handle_id()); - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(vec)); - pad(); - } - - viennacl::linalg::av(*this, - vec, cpu_value_type(1.0), 1, false, false); + size_ = vec.size(); + internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); + elements_.switch_active_handle_id(vec.handle().get_active_handle_id()); + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(vec)); + pad(); } - return *this; + viennacl::linalg::av(*this, + vec, cpu_value_type(1.0), 1, false, false); +} + +return *this; } @@ -384,122 +386,122 @@ vector_base & vector_base +template template -vector_base & vector_base::operator=(const vector_expression & proxy) +vector_base & vector_base::operator=(const vector_expression & proxy) { - assert( ( (viennacl::traits::size(proxy) == size()) || (size() == 0) ) - && bool("Incompatible vector sizes!")); +assert( ( (viennacl::traits::size(proxy) == size()) || (size() == 0) ) + && bool("Incompatible vector sizes!")); - // initialize the necessary buffer - if (size() == 0) - { - size_ = viennacl::traits::size(proxy); - internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy)); - pad(); - } +// initialize the necessary buffer +if (size() == 0) +{ + size_ = viennacl::traits::size(proxy); + internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(proxy)); + pad(); +} - linalg::detail::op_executor >::apply(*this, proxy); +linalg::detail::op_executor >::apply(*this, proxy); - return *this; +return *this; } // convert from vector with other numeric type -template +template template -vector_base & vector_base:: operator = (const vector_base & v1) +vector_base & vector_base:: operator = (const vector_base & v1) { - assert( ( (v1.size() == size()) || (size() == 0) ) - && bool("Incompatible vector sizes!")); +assert( ( (v1.size() == size()) || (size() == 0) ) + && bool("Incompatible vector sizes!")); - if (size() == 0) +if (size() == 0) +{ + size_ = v1.size(); + if (size_ > 0) { - size_ = v1.size(); - if (size_ > 0) - { - internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(v1)); - pad(); - } + internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), viennacl::traits::context(v1)); + pad(); } +} - viennacl::linalg::convert(*this, v1); +viennacl::linalg::convert(*this, v1); - return *this; +return *this; } /** @brief Creates the vector from the supplied unit vector. */ -template -vector_base & vector_base::operator = (unit_vector const & v) +template +vector_base & vector_base::operator = (unit_vector const & v) { - assert( ( (v.size() == size()) || (size() == 0) ) - && bool("Incompatible vector sizes!")); +assert( ( (v.size() == size()) || (size() == 0) ) + && bool("Incompatible vector sizes!")); - if (size() == 0) +if (size() == 0) +{ + size_ = v.size(); + internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); + if (size_ > 0) { - size_ = v.size(); - internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); - if (size_ > 0) - { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), v.context()); - clear(); - } + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), v.context()); + clear(); } - else - viennacl::linalg::vector_assign(*this, NumericT(0)); +} +else + viennacl::linalg::vector_assign(*this, NumericT(0)); - if (size_ > 0) - this->operator()(v.index()) = NumericT(1); +if (size_ > 0) + this->operator()(v.index()) = NumericT(1); - return *this; +return *this; } /** @brief Creates the vector from the supplied zero vector. */ -template -vector_base & vector_base::operator = (zero_vector const & v) +template +vector_base & vector_base::operator = (zero_vector const & v) { - assert( ( (v.size() == size()) || (size() == 0) ) - && bool("Incompatible vector sizes!")); +assert( ( (v.size() == size()) || (size() == 0) ) + && bool("Incompatible vector sizes!")); - if (size() == 0) +if (size() == 0) +{ + size_ = v.size(); + internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); + if (size_ > 0) { - size_ = v.size(); - internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); - if (size_ > 0) - { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), v.context()); - clear(); - } + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), v.context()); + clear(); } - else - viennacl::linalg::vector_assign(*this, NumericT(0)); +} +else + viennacl::linalg::vector_assign(*this, NumericT(0)); - return *this; +return *this; } /** @brief Creates the vector from the supplied scalar vector. */ -template -vector_base & vector_base::operator = (scalar_vector const & v) +template +vector_base & vector_base::operator = (scalar_vector const & v) { - assert( ( (v.size() == size()) || (size() == 0) ) - && bool("Incompatible vector sizes!")); +assert( ( (v.size() == size()) || (size() == 0) ) + && bool("Incompatible vector sizes!")); - if (size() == 0) +if (size() == 0) +{ + size_ = v.size(); + internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); + if (size_ > 0) { - size_ = v.size(); - internal_size_ = viennacl::tools::align_to_multiple(size_, dense_padding_size); - if (size_ > 0) - { - viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), v.context()); - pad(); - } + viennacl::backend::memory_create(elements_, sizeof(NumericT)*internal_size(), v.context()); + pad(); } +} - if (size_ > 0) - viennacl::linalg::vector_assign(*this, v[0]); +if (size_ > 0) + viennacl::linalg::vector_assign(*this, v[0]); - return *this; +return *this; } @@ -510,46 +512,46 @@ vector_base & vector_base -template -vector_base & vector_base::operator=(const viennacl::vector_expression< const matrix_base, const vector_base, viennacl::op_prod> & proxy) +template +vector_base & vector_base::operator=(const viennacl::vector_expression< const matrix_base, const vector_base, viennacl::op_prod> & proxy) { - assert(viennacl::traits::size1(proxy.lhs()) == size() && bool("Size check failed for v1 = A * v2: size1(A) != size(v1)")); +assert(viennacl::traits::size1(proxy.lhs()) == size() && bool("Size check failed for v1 = A * v2: size1(A) != size(v1)")); - // check for the special case x = A * x - if (viennacl::traits::handle(proxy.rhs()) == viennacl::traits::handle(*this)) - { - viennacl::vector result(viennacl::traits::size1(proxy.lhs())); - viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); - *this = result; - } - else - { - viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); - } - return *this; +// check for the special case x = A * x +if (viennacl::traits::handle(proxy.rhs()) == viennacl::traits::handle(*this)) +{ + viennacl::vector result(viennacl::traits::size1(proxy.lhs())); + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); + *this = result; +} +else +{ + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); +} +return *this; } //transposed_matrix_proxy: -template -vector_base & vector_base::operator=(const vector_expression< const matrix_expression< const matrix_base, const matrix_base, op_trans >, - const vector_base, - op_prod> & proxy) +template +vector_base & vector_base::operator=(const vector_expression< const matrix_expression< const matrix_base, const matrix_base, op_trans >, + const vector_base, + op_prod> & proxy) { - assert(viennacl::traits::size1(proxy.lhs()) == size() && bool("Size check failed in v1 = trans(A) * v2: size2(A) != size(v1)")); +assert(viennacl::traits::size1(proxy.lhs()) == size() && bool("Size check failed in v1 = trans(A) * v2: size2(A) != size(v1)")); - // check for the special case x = trans(A) * x - if (viennacl::traits::handle(proxy.rhs()) == viennacl::traits::handle(*this)) - { - viennacl::vector result(viennacl::traits::size1(proxy.lhs())); - viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); - *this = result; - } - else - { - viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); - } - return *this; +// check for the special case x = trans(A) * x +if (viennacl::traits::handle(proxy.rhs()) == viennacl::traits::handle(*this)) +{ + viennacl::vector result(viennacl::traits::size1(proxy.lhs())); + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); + *this = result; +} +else +{ + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); +} +return *this; } ///////////////////////////// Matrix Vector interaction end /////////////////////////////////// @@ -558,40 +560,40 @@ vector_base & vector_base -entry_proxy vector_base::operator()(size_type index) +template +entry_proxy vector_base::operator()(size_type index) { - assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); - assert( index < size() && bool("Index out of bounds!") ); +assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); +assert( index < size() && bool("Index out of bounds!") ); - return entry_proxy(start_ + stride_ * index, elements_); +return entry_proxy(start_ + stride_ * index, elements_); } -template -entry_proxy vector_base::operator[](size_type index) +template +entry_proxy vector_base::operator[](size_type index) { - assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); - assert( index < size() && bool("Index out of bounds!") ); +assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); +assert( index < size() && bool("Index out of bounds!") ); - return entry_proxy(start_ + stride_ * index, elements_); +return entry_proxy(start_ + stride_ * index, elements_); } -template -const_entry_proxy vector_base::operator()(size_type index) const +template +const_entry_proxy vector_base::operator()(size_type index) const { - assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); - assert( index < size() && bool("Index out of bounds!") ); +assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); +assert( index < size() && bool("Index out of bounds!") ); - return const_entry_proxy(start_ + stride_ * index, elements_); +return const_entry_proxy(start_ + stride_ * index, elements_); } -template -const_entry_proxy vector_base::operator[](size_type index) const +template +const_entry_proxy vector_base::operator[](size_type index) const { - assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); - assert( index < size() && bool("Index out of bounds!") ); +assert( (size() > 0) && bool("Cannot apply operator() to vector of size zero!")); +assert( index < size() && bool("Index out of bounds!") ); - return const_entry_proxy(start_ + stride_ * index, elements_); +return const_entry_proxy(start_ + stride_ * index, elements_); } //////////////////////////// Read-write access to an element of the vector end /////////////////// @@ -600,236 +602,236 @@ const_entry_proxy vector_base::operator[]( // // Operator overloads with implicit conversion (thus cannot be made global without introducing additional headache) // -template -vector_base & vector_base::operator += (const self_type & vec) +template +vector_base & vector_base::operator += (const self_type & vec) { - assert(vec.size() == size() && bool("Incompatible vector sizes!")); +assert(vec.size() == size() && bool("Incompatible vector sizes!")); - if (size() > 0) - viennacl::linalg::avbv(*this, - *this, NumericT(1.0), 1, false, false, - vec, NumericT(1.0), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::avbv(*this, + *this, NumericT(1.0), 1, false, false, + vec, NumericT(1.0), 1, false, false); +return *this; } -template -vector_base & vector_base::operator -= (const self_type & vec) +template +vector_base & vector_base::operator -= (const self_type & vec) { - assert(vec.size() == size() && bool("Incompatible vector sizes!")); +assert(vec.size() == size() && bool("Incompatible vector sizes!")); - if (size() > 0) - viennacl::linalg::avbv(*this, - *this, NumericT(1.0), 1, false, false, - vec, NumericT(-1.0), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::avbv(*this, + *this, NumericT(1.0), 1, false, false, + vec, NumericT(-1.0), 1, false, false); +return *this; } /** @brief Scales a vector (or proxy) by a char (8-bit integer) value */ -template -vector_base & vector_base::operator *= (char val) +template +vector_base & vector_base::operator *= (char val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, false, false); +return *this; } /** @brief Scales a vector (or proxy) by a short integer value */ -template -vector_base & vector_base::operator *= (short val) +template +vector_base & vector_base::operator *= (short val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, false, false); +return *this; } /** @brief Scales a vector (or proxy) by an integer value */ -template -vector_base & vector_base::operator *= (int val) +template +vector_base & vector_base::operator *= (int val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, false, false); +return *this; } /** @brief Scales a vector (or proxy) by a long integer value */ -template -vector_base & vector_base::operator *= (long val) +template +vector_base & vector_base::operator *= (long val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, false, false); +return *this; } /** @brief Scales a vector (or proxy) by a single precision floating point value */ -template -vector_base & vector_base::operator *= (float val) +template +vector_base & vector_base::operator *= (float val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, false, false); +return *this; } /** @brief Scales a vector (or proxy) by a double precision floating point value */ -template -vector_base & vector_base::operator *= (double val) +template +vector_base & vector_base::operator *= (double val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, false, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, false, false); +return *this; } /** @brief Scales this vector by a char (8-bit) value */ -template -vector_base & vector_base::operator /= (char val) +template +vector_base & vector_base::operator /= (char val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, true, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, true, false); +return *this; } /** @brief Scales this vector by a short integer value */ -template -vector_base & vector_base::operator /= (short val) +template +vector_base & vector_base::operator /= (short val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, true, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, true, false); +return *this; } /** @brief Scales this vector by an integer value */ -template -vector_base & vector_base::operator /= (int val) +template +vector_base & vector_base::operator /= (int val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, true, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, true, false); +return *this; } /** @brief Scales this vector by a long integer value */ -template -vector_base & vector_base::operator /= (long val) +template +vector_base & vector_base::operator /= (long val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, true, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, true, false); +return *this; } /** @brief Scales this vector by a single precision floating point value */ -template -vector_base & vector_base::operator /= (float val) +template +vector_base & vector_base::operator /= (float val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, true, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, true, false); +return *this; } /** @brief Scales this vector by a double precision floating point value */ -template -vector_base & vector_base::operator /= (double val) +template +vector_base & vector_base::operator /= (double val) { - if (size() > 0) - viennacl::linalg::av(*this, - *this, NumericT(val), 1, true, false); - return *this; +if (size() > 0) + viennacl::linalg::av(*this, + *this, NumericT(val), 1, true, false); +return *this; } /** @brief Scales the vector by a char (8-bit value) 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (char value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (char value) const { - return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a short integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (short value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (short value) const { - return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by an integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (int value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (int value) const { - return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a long integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (long value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (long value) const { - return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a single precision floating point number 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (float value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (float value) const { - return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a single precision floating point number 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_mult> -vector_base::operator * (double value) const +template +vector_expression< const vector_base, const NumericT, op_mult> +vector_base::operator * (double value) const { - return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_mult>(*this, NumericT(value)); } /** @brief Scales the vector by a char (8-bit value) 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (char value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (char value) const { - return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by a short integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (short value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (short value) const { - return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by an integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (int value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (int value) const { - return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by a long integer 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (long value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (long value) const { - return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by a single precision floating point number 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (float value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (float value) const { - return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Scales the vector by a double precision floating point number 'alpha' and returns an expression template */ -template -vector_expression< const vector_base, const NumericT, op_div> -vector_base::operator / (double value) const +template +vector_expression< const vector_base, const NumericT, op_div> +vector_base::operator / (double value) const { - return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); +return vector_expression< const self_type, const NumericT, op_div>(*this, NumericT(value)); } /** @brief Sign flip for the vector. Emulated to be equivalent to -1.0 * vector */ -template -vector_expression, const NumericT, op_mult> -vector_base::operator-() const +template +vector_expression, const NumericT, op_mult> +vector_base::operator-() const { - return vector_expression(*this, NumericT(-1.0)); +return vector_expression(*this, NumericT(-1.0)); } // @@ -837,88 +839,88 @@ vector_base::operator-() const // /** @brief Returns an iterator pointing to the beginning of the vector (STL like)*/ -template -typename vector_base::iterator vector_base::begin() +template +typename vector_base::iterator vector_base::begin() { - return iterator(*this, 0, start_, stride_); +return iterator(*this, 0, start_, stride_); } /** @brief Returns an iterator pointing to the end of the vector (STL like)*/ -template -typename vector_base::iterator vector_base::end() +template +typename vector_base::iterator vector_base::end() { - return iterator(*this, size(), start_, stride_); +return iterator(*this, size(), start_, stride_); } /** @brief Returns a const-iterator pointing to the beginning of the vector (STL like)*/ -template -typename vector_base::const_iterator vector_base::begin() const +template +typename vector_base::const_iterator vector_base::begin() const { - return const_iterator(*this, 0, start_, stride_); +return const_iterator(*this, 0, start_, stride_); } -template -typename vector_base::const_iterator vector_base::end() const +template +typename vector_base::const_iterator vector_base::end() const { - return const_iterator(*this, size(), start_, stride_); +return const_iterator(*this, size(), start_, stride_); } -template -vector_base & vector_base::swap(self_type & other) +template +vector_base & vector_base::swap(self_type & other) { - viennacl::linalg::vector_swap(*this, other); - return *this; +viennacl::linalg::vector_swap(*this, other); +return *this; } -template -void vector_base::clear() +template +void vector_base::clear() { - viennacl::linalg::vector_assign(*this, cpu_value_type(0.0), true); +viennacl::linalg::vector_assign(*this, cpu_value_type(0.0), true); } -template -vector_base & vector_base::fast_swap(self_type & other) +template +vector_base & vector_base::fast_swap(self_type & other) { - assert(this->size_ == other.size_ && bool("Vector size mismatch")); - this->elements_.swap(other.elements_); - return *this; +assert(this->size_ == other.size_ && bool("Vector size mismatch")); +this->elements_.swap(other.elements_); +return *this; } -template -void vector_base::pad() +template +void vector_base::pad() { - if (internal_size() != size()) - { - std::vector pad(internal_size() - size()); - viennacl::backend::memory_write(elements_, sizeof(NumericT) * size(), sizeof(NumericT) * pad.size(), &(pad[0])); - } +if (internal_size() != size()) +{ + std::vector pad(internal_size() - size()); + viennacl::backend::memory_write(elements_, sizeof(NumericT) * size(), sizeof(NumericT) * pad.size(), &(pad[0])); +} } -template -void vector_base::switch_memory_context(viennacl::context new_ctx) +template +void vector_base::switch_memory_context(viennacl::context new_ctx) { - viennacl::backend::switch_memory_context(elements_, new_ctx); +viennacl::backend::switch_memory_context(elements_, new_ctx); } //TODO: Think about implementing the following public member functions //void insert_element(unsigned int i, NumericT val){} //void erase_element(unsigned int i){} -template -void vector_base::resize(size_type new_size, bool preserve) +template +void vector_base::resize(size_type new_size, bool preserve) { - resize_impl(new_size, viennacl::traits::context(*this), preserve); +resize_impl(new_size, viennacl::traits::context(*this), preserve); } -template -void vector_base::resize(size_type new_size, viennacl::context ctx, bool preserve) +template +void vector_base::resize(size_type new_size, viennacl::context ctx, bool preserve) { - resize_impl(new_size, ctx, preserve); +resize_impl(new_size, ctx, preserve); } -template -void vector_base::resize_impl(size_type new_size, viennacl::context ctx, bool preserve) +template +void vector_base::resize_impl(size_type new_size, viennacl::context ctx, bool preserve) { assert(new_size > 0 && bool("Positive size required when resizing vector!")); @@ -946,11 +948,11 @@ void vector_base::resize_impl(size_type new_size, vi } -template -class vector : public vector_base +template +class vector : public vector_base { - typedef vector self_type; - typedef vector_base base_type; + typedef vector self_type; + typedef vector_base base_type; public: typedef typename base_type::size_type size_type; @@ -1069,10 +1071,10 @@ public: }; //vector /** @brief Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from viennacl::tie(). */ -template +template class vector_tuple { - typedef vector_base VectorType; + typedef vector_base VectorType; public: // 2 vectors @@ -1151,65 +1153,65 @@ private: }; // 2 args -template -vector_tuple tie(vector_base const & v0, vector_base const & v1) { return vector_tuple(v0, v1); } +template +vector_tuple tie(vector_base const & v0, vector_base const & v1) { return vector_tuple(v0, v1); } -template -vector_tuple tie(vector_base & v0, vector_base & v1) { return vector_tuple(v0, v1); } +template +vector_tuple tie(vector_base & v0, vector_base & v1) { return vector_tuple(v0, v1); } // 3 args -template -vector_tuple tie(vector_base const & v0, vector_base const & v1, vector_base const & v2) { return vector_tuple(v0, v1, v2); } +template +vector_tuple tie(vector_base const & v0, vector_base const & v1, vector_base const & v2) { return vector_tuple(v0, v1, v2); } -template -vector_tuple tie(vector_base & v0, vector_base & v1, vector_base & v2) { return vector_tuple(v0, v1, v2); } +template +vector_tuple tie(vector_base & v0, vector_base & v1, vector_base & v2) { return vector_tuple(v0, v1, v2); } // 4 args -template -vector_tuple tie(vector_base const & v0, vector_base const & v1, vector_base const & v2, vector_base const & v3) +template +vector_tuple tie(vector_base const & v0, vector_base const & v1, vector_base const & v2, vector_base const & v3) { - return vector_tuple(v0, v1, v2, v3); + return vector_tuple(v0, v1, v2, v3); } -template -vector_tuple tie(vector_base & v0, vector_base & v1, vector_base & v2, vector_base & v3) +template +vector_tuple tie(vector_base & v0, vector_base & v1, vector_base & v2, vector_base & v3) { - return vector_tuple(v0, v1, v2, v3); + return vector_tuple(v0, v1, v2, v3); } // 5 args -template -vector_tuple tie(vector_base const & v0, - vector_base const & v1, - vector_base const & v2, - vector_base const & v3, - vector_base const & v4) -{ - typedef vector_base const * VectorPointerType; +template +vector_tuple tie(vector_base const & v0, + vector_base const & v1, + vector_base const & v2, + vector_base const & v3, + vector_base const & v4) +{ + typedef vector_base const * VectorPointerType; std::vector vec(5); vec[0] = &v0; vec[1] = &v1; vec[2] = &v2; vec[3] = &v3; vec[4] = &v4; - return vector_tuple(vec); + return vector_tuple(vec); } -template -vector_tuple tie(vector_base & v0, - vector_base & v1, - vector_base & v2, - vector_base & v3, - vector_base & v4) +template +vector_tuple tie(vector_base & v0, + vector_base & v1, + vector_base & v2, + vector_base & v3, + vector_base & v4) { - typedef vector_base * VectorPointerType; + typedef vector_base * VectorPointerType; std::vector vec(5); vec[0] = &v0; vec[1] = &v1; vec[2] = &v2; vec[3] = &v3; vec[4] = &v4; - return vector_tuple(vec); + return vector_tuple(vec); } // TODO: Add more arguments to tie() here. Maybe use some preprocessor magic to accomplish this. @@ -1230,9 +1232,9 @@ vector_tuple tie(vector_base & v0, * @param gpu_end GPU iterator pointing to the end of the vector (STL-like) * @param cpu_begin Output iterator for the cpu vector. The cpu vector must be at least as long as the gpu vector! */ -template -void fast_copy(const const_vector_iterator & gpu_begin, - const const_vector_iterator & gpu_end, +template +void fast_copy(const const_vector_iterator & gpu_begin, + const const_vector_iterator & gpu_end, CPU_ITERATOR cpu_begin ) { if (gpu_begin != gpu_end) @@ -1263,8 +1265,8 @@ void fast_copy(const const_vector_iterator & gpu_begin, * @param gpu_vec A gpu vector. * @param cpu_vec The cpu vector. Type requirements: Output iterator pointing to entries linear in memory can be obtained via member function .begin() */ -template -void fast_copy(vector_base const & gpu_vec, CPUVECTOR & cpu_vec ) +template +void fast_copy(vector_base const & gpu_vec, CPUVECTOR & cpu_vec ) { viennacl::fast_copy(gpu_vec.begin(), gpu_vec.end(), cpu_vec.begin()); } @@ -1280,9 +1282,9 @@ void fast_copy(vector_base const & gpu_vec, CPUVECTOR & cpu_vec ) * @param gpu_end GPU iterator pointing to the end of the vector (STL-like) * @param cpu_begin Output iterator for the cpu vector. The cpu vector must be at least as long as the gpu vector! */ -template -void async_copy(const const_vector_iterator & gpu_begin, - const const_vector_iterator & gpu_end, +template +void async_copy(const const_vector_iterator & gpu_begin, + const const_vector_iterator & gpu_end, CPU_ITERATOR cpu_begin ) { if (gpu_begin != gpu_end) @@ -1318,9 +1320,9 @@ void async_copy(vector_base const & gpu_vec, CPUVECTOR & cpu_vec ) * @param gpu_end GPU constant iterator pointing to the end of the vector (STL-like) * @param cpu_begin Output iterator for the cpu vector. The cpu vector must be at least as long as the gpu vector! */ -template -void copy(const const_vector_iterator & gpu_begin, - const const_vector_iterator & gpu_end, +template +void copy(const const_vector_iterator & gpu_begin, + const const_vector_iterator & gpu_end, CPU_ITERATOR cpu_begin ) { assert(gpu_end - gpu_begin >= 0 && bool("Iterators incompatible")); @@ -1340,14 +1342,14 @@ void copy(const const_vector_iterator & gpu_begin, * @param gpu_end GPU iterator pointing to the end of the vector (STL-like) * @param cpu_begin Output iterator for the cpu vector. The cpu vector must be at least as long as the gpu vector! */ -template -void copy(const vector_iterator & gpu_begin, - const vector_iterator & gpu_end, +template +void copy(const vector_iterator & gpu_begin, + const vector_iterator & gpu_end, CPU_ITERATOR cpu_begin ) { - viennacl::copy(const_vector_iterator(gpu_begin), - const_vector_iterator(gpu_end), + viennacl::copy(const_vector_iterator(gpu_begin), + const_vector_iterator(gpu_end), cpu_begin); } @@ -1396,10 +1398,10 @@ void copy(vector const & gpu_vec, * @param cpu_end CPU iterator pointing to the end of the vector (STL-like) * @param gpu_begin Output iterator for the gpu vector. The gpu iterator must be incrementable (cpu_end - cpu_begin) times, otherwise the result is undefined. */ -template +template void fast_copy(CPU_ITERATOR const & cpu_begin, CPU_ITERATOR const & cpu_end, - vector_iterator gpu_begin) + vector_iterator gpu_begin) { if (cpu_end - cpu_begin > 0) { @@ -1430,8 +1432,8 @@ void fast_copy(CPU_ITERATOR const & cpu_begin, * @param cpu_vec A cpu vector. Type requirements: Iterator can be obtained via member function .begin() and .end() * @param gpu_vec The gpu vector. */ -template -void fast_copy(const CPUVECTOR & cpu_vec, vector_base & gpu_vec) +template +void fast_copy(const CPUVECTOR & cpu_vec, vector_base & gpu_vec) { viennacl::fast_copy(cpu_vec.begin(), cpu_vec.end(), gpu_vec.begin()); } @@ -2056,10 +2058,10 @@ namespace detail }; // x = inner_prod(z, {y0, y1, ...}) - template - struct op_executor, op_assign, vector_expression, const vector_tuple, op_inner_prod> > + template + struct op_executor, op_assign, vector_expression, const vector_tuple, op_inner_prod> > { - static void apply(vector_base & lhs, vector_expression, const vector_tuple, op_inner_prod> const & rhs) + static void apply(vector_base & lhs, vector_expression, const vector_tuple, op_inner_prod> const & rhs) { viennacl::linalg::inner_prod_impl(rhs.lhs(), rhs.rhs(), lhs); }