diff --git a/doc/source/array.rst b/doc/source/array.rst index c24304a7ba56001608f556ecfa7cbe8d6a6845b7..0e435a7c69b8954c53356d1d0d4dd54a4522611e 100644 --- a/doc/source/array.rst +++ b/doc/source/array.rst @@ -5,9 +5,7 @@ The :class:`Array` Class .. class:: DefaultAllocator(context, flags=pyopencl.mem_flags.READ_WRITE) - An Allocator that uses :class:`pyopencl.Buffer` with the given *flags*. - - .. method:: __call__(self, size) + An alias for :class:`pyopencl.tools.CLAllocator`. .. class:: Array(cqa, shape, dtype, order="C", allocator=None, base=None, data=None, queue=None) diff --git a/doc/source/index.rst b/doc/source/index.rst index c3bca32474ce1b000205fe330b7da38bc0f622d7..457b4cdb44b37c7147b02c3b33e7e670ee43c872 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -71,6 +71,7 @@ Contents runtime array + tools misc Note that this guide does not explain OpenCL programming and technology. Please diff --git a/doc/source/misc.rst b/doc/source/misc.rst index b2ff45a80facb4227d5272e724157f4cbe47c412..f94b716252cb0acb5af923806b690547e634b8a8 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -86,12 +86,13 @@ Version 2011.1 :func:`pyopencl.enqueue_map_image`. * Add :mod:`pyopencl.reduction`. * Add :ref:`reductions`. -* Add :meth:`MemoryObject.get_host_array`. +* Add :meth:`pyopencl.MemoryObject.get_host_array`. * Deprecate context arguments of :func:`pyopencl.array.to_device`, :func:`pyopencl.array.zeros`, :func:`pyopencl.array.arange`. * Make construction of :class:`pyopencl.array.Array` more flexible (*cqa* argument.) +* Add :ref:`memory-pools`. Version 0.92 ------------ diff --git a/doc/source/tools.rst b/doc/source/tools.rst new file mode 100644 index 0000000000000000000000000000000000000000..feb65a7d93f99b4f0b4691d7e6ab67d27afb0517 --- /dev/null +++ b/doc/source/tools.rst @@ -0,0 +1,69 @@ +Built-in Utilities +================== + +.. module:: pyopencl.tools + +.. _memory-pools: + +Memory Pools +------------ + +The constructor :func:`pyopencl.Buffer` can consume a fairly large amount of +processing time if it is invoked very frequently. For example, code based on +:class:`pyopencl.array.Array` can easily run into this issue because a +fresh memory area is allocated for each intermediate result. Memory pools are a +remedy for this problem based on the observation that often many of the block +allocations are of the same sizes as previously used ones. + +Then, instead of fully returning the memory to the system and incurring the +associated reallocation overhead, the pool holds on to the memory and uses it +to satisfy future allocations of similarly-sized blocks. The pool reacts +appropriately to out-of-memory conditions as long as all memory allocations +are made through it. Allocations performed from outside of the pool may run +into spurious out-of-memory conditions due to the pool owning much or all of +the available memory. + +.. class:: PooledBuffer + + An object representing a :class:`MemoryPool`-based allocation of + device memory. Once this object is deleted, its associated device + memory is returned to the pool. This supports the same interface + as :class:`pyopencl.Buffer`. + +.. class:: CLAllocator(context, mem_flags=pyopencl.mem_flags.READ_WRITE) + + *mem_flags* takes its values from :class:`pyopencl.mem_flags` and corresponds + to the *flags* argument of :class:`pyopencl.Buffer`. + + .. method:: __call__(size) + + Allocate a :class:`pyopencl.Buffer` of the given *size*. + +.. class:: MemoryPool(allocator=CLAllocator()) + + A memory pool for OpenCL device memory. + + .. attribute:: held_blocks + + The number of unused blocks being held by this pool. + + .. attribute:: active_blocks + + The number of blocks in active use that have been allocated + through this pool. + + .. method:: allocate(size) + + Return a :class:`PooledBuffer` of the given *size*. + + .. method:: free_held + + Free all unused memory that the pool is currently holding. + + .. method:: stop_holding + + Instruct the memory to start immediately freeing memory returned + to it, instead of holding it for future allocations. + Implicitly calls :meth:`free_held`. + This is useful as a cleanup action when a memory pool falls out + of use. diff --git a/pyopencl/array.py b/pyopencl/array.py index f3ae7c3f3a3ab38a90a34f9ba4211b3f4bd5f6c2..d068dfd5f457c7f6aa765016bef0162f600a015b 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -112,13 +112,7 @@ def elwise_kernel_runner(kernel_getter): -class DefaultAllocator: - def __init__(self, context, flags=cl.mem_flags.READ_WRITE): - self.context = context - self.flags = flags - - def __call__(self, size): - return cl.Buffer(self.context, self.flags, size) +DefaultAllocator = cl.CLAllocator diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 17310a64da97b23723095de9c661ece5660b08c6..d3ac00ee162a2df06fdf9e21026ca716dd881767 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -1,4 +1,4 @@ -"""H.""" +"""Various helpful bits and pieces without much of a common theme.""" from __future__ import division @@ -36,6 +36,13 @@ import pyopencl as cl +PooledBuffer = cl.PooledBuffer +CLAllocator = cl.CLAllocator +MemoryPool = cl.MemoryPool + + + + @decorator def context_dependent_memoize(func, context, *args): """Provides memoization for things that get created inside diff --git a/src/wrapper/wrap_mempool.cpp b/src/wrapper/wrap_mempool.cpp index 6cfa84b87b0b6e15591dce37a9251466bf7243da..31da32860174a210dc89ed5729b66d1b6e115dbf 100644 --- a/src/wrapper/wrap_mempool.cpp +++ b/src/wrapper/wrap_mempool.cpp @@ -22,7 +22,7 @@ namespace public: cl_allocator(boost::shared_ptr<pyopencl::context> const &ctx, - cl_mem_flags flags) + cl_mem_flags flags=CL_MEM_READ_WRITE) : m_context(ctx), m_flags(flags) { if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) @@ -52,6 +52,25 @@ namespace + inline + pyopencl::buffer *allocator_call(cl_allocator &alloc, size_t size) + { + cl_mem mem = alloc.allocate(size); + + try + { + return new pyopencl::buffer(mem, false); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); + throw; + } + } + + + + class pooled_buffer : public pyopencl::pooled_allocation<pyopencl::memory_pool<cl_allocator> >, public pyopencl::memory_object_holder @@ -113,7 +132,12 @@ void pyopencl_expose_mempool() py::class_<cls> wrapper("CLAllocator", py::init< boost::shared_ptr<pyopencl::context> const &, - cl_mem_flags>()); + py::optional<cl_mem_flags> >()); + wrapper + .def("__call__", allocator_call, + py::return_value_policy<py::manage_new_object>()) + ; + } { diff --git a/test/test_wrapper.py b/test/test_wrapper.py index ddd7c0e149b54d74887e5a9b86ed18d38d271356..d312260a52049f5c2ca3e1f111d14807878a6ef9 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -290,6 +290,42 @@ class TestCL: assert la.norm(a - b) == 0 + @pytools.test.mark_test.opencl + def test_mempool(self, ctx_getter): + from pyopencl.tools import MemoryPool, CLAllocator + + context = ctx_getter() + + pool = MemoryPool(CLAllocator(context)) + maxlen = 10 + queue = [] + + e0 = 12 + + for e in range(e0-6, e0-4): + for i in range(100): + queue.append(pool.allocate(1<<e)) + if len(queue) > 10: + queue.pop(0) + del queue + pool.stop_holding() + + @pytools.test.mark_test.opencl + def test_mempool_2(self): + from pyopencl.tools import MemoryPool + from random import randrange + + for i in range(2000): + s = randrange(1<<31) >> randrange(32) + bin_nr = MemoryPool.bin_number(s) + asize = MemoryPool.alloc_size(bin_nr) + + assert asize >= s, s + assert MemoryPool.bin_number(asize) == bin_nr, s + assert asize < asize*(1+1/8) + + +