Newer
Older
// Warp memory pool
//
// Copyright (C) 2009 Andreas Kloeckner
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// Gregor Thalhammer (on Apr 13, 2011) said it's necessary to import Python.h
// first to prevent OS X from overriding a bunch of macros. (e.g. isspace)
#include <Python.h>
Andreas Klöckner
committed
#define NO_IMPORT_ARRAY
#define PY_ARRAY_UNIQUE_SYMBOL pyopencl_ARRAY_API
#include <vector>
#include "wrap_helpers.hpp"
#include "mempool.hpp"
#include "tools.hpp"
namespace
{
class cl_allocator_base
{
protected:
std::shared_ptr<pyopencl::context> m_context;
cl_allocator_base(std::shared_ptr<pyopencl::context> const &ctx,
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
cl_mem_flags flags=CL_MEM_READ_WRITE)
: m_context(ctx), m_flags(flags)
{
if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))
throw pyopencl::error("Allocator", CL_INVALID_VALUE,
"cannot specify USE_HOST_PTR or COPY_HOST_PTR flags");
}
cl_allocator_base(cl_allocator_base const &src)
: m_context(src.m_context), m_flags(src.m_flags)
{ }
virtual ~cl_allocator_base()
{ }
typedef cl_mem pointer_type;
typedef size_t size_type;
virtual cl_allocator_base *copy() const = 0;
virtual bool is_deferred() const = 0;
virtual pointer_type allocate(size_type s) = 0;
void free(pointer_type p)
{
PYOPENCL_CALL_GUARDED(clReleaseMemObject, (p));
}
void try_release_blocks()
{
pyopencl::run_python_gc();
}
};
class cl_deferred_allocator : public cl_allocator_base
{
private:
typedef cl_allocator_base super;
public:
cl_deferred_allocator(std::shared_ptr<pyopencl::context> const &ctx,
cl_mem_flags flags=CL_MEM_READ_WRITE)
: super(ctx, flags)
{ }
cl_allocator_base *copy() const
{
return new cl_deferred_allocator(*this);
}
bool is_deferred() const
{ return true; }
pointer_type allocate(size_type s)
{
if (s == 0)
return nullptr;
return pyopencl::create_buffer(m_context->data(), m_flags, s, 0);
}
};
const unsigned zero = 0;
class cl_immediate_allocator : public cl_allocator_base
{
private:
typedef cl_allocator_base super;
pyopencl::command_queue m_queue;
public:
cl_immediate_allocator(pyopencl::command_queue &queue,
cl_mem_flags flags=CL_MEM_READ_WRITE)
: super(std::shared_ptr<pyopencl::context>(queue.get_context()), flags),
m_queue(queue.data(), /*retain*/ true)
{ }
cl_immediate_allocator(cl_immediate_allocator const &src)
: super(src), m_queue(src.m_queue)
{ }
cl_allocator_base *copy() const
{
return new cl_immediate_allocator(*this);
}
bool is_deferred() const
{ return false; }
pointer_type allocate(size_type s)
{
if (s == 0)
return nullptr;
pointer_type ptr = pyopencl::create_buffer(
m_context->data(), m_flags, s, 0);
// Make sure the buffer gets allocated right here and right now.
// This looks (and is) expensive. But immediate allocators
// have their main use in memory pools, whose basic assumption
// is that allocation is too expensive anyway--but they rely
// on 'out-of-memory' being reported on allocation. (If it is
// reported in a deferred manner, it has no way to react
// (e.g. by freeing unused memory) because it is not part of
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
unsigned zero = 0;
PYOPENCL_CALL_GUARDED(clEnqueueWriteBuffer, (
m_queue.data(),
ptr,
/* is blocking */ CL_FALSE,
0, std::min(s, sizeof(zero)), &zero,
0, NULL, NULL
));
// No need to wait for completion here. clWaitForEvents (e.g.)
// cannot return mem object allocation failures. This implies that
// the buffer is faulted onto the device on enqueue.
return ptr;
}
};
inline
pyopencl::buffer *allocator_call(cl_allocator_base &alloc, size_t size)
{
cl_mem mem;
int try_count = 0;
while (try_count < 2)
{
try
{
mem = alloc.allocate(size);
break;
}
catch (pyopencl::error &e)
{
if (!e.is_out_of_memory())
throw;
if (++try_count == 2)
throw;
}
alloc.try_release_blocks();
}
if (!mem)
{
if (size == 0)
return nullptr;
else
throw pyopencl::error("Allocator", CL_INVALID_VALUE,
"allocator succeeded but returned NULL cl_mem");
}
try
{
return new pyopencl::buffer(mem, false);
}
catch (...)
{
PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem));
throw;
}
}
class pooled_buffer
: public pyopencl::pooled_allocation<pyopencl::memory_pool<cl_allocator_base> >,
public pyopencl::memory_object_holder
{
private:
typedef
pyopencl::pooled_allocation<pyopencl::memory_pool<cl_allocator_base> >
super;
public:
pooled_buffer(
std::shared_ptr<super::pool_type> p, super::size_type s)
: super(p, s)
{ }
const super::pointer_type data() const
{ return ptr(); }
};
pooled_buffer *device_pool_allocate(
std::shared_ptr<pyopencl::memory_pool<cl_allocator_base> > pool,
pyopencl::memory_pool<cl_allocator_base>::size_type sz)
{
return new pooled_buffer(pool, sz);
}
template<class Wrapper>
void expose_memory_pool(Wrapper &wrapper)
{
typedef typename Wrapper::type cls;
.def_property_readonly("held_blocks", &cls::held_blocks)
.def_property_readonly("active_blocks", &cls::active_blocks)
.DEF_SIMPLE_METHOD(bin_number)
.DEF_SIMPLE_METHOD(alloc_size)
.DEF_SIMPLE_METHOD(free_held)
.DEF_SIMPLE_METHOD(stop_holding)
;
}
}
void pyopencl_expose_mempool(py::module &m)
m.def("bitlog2", pyopencl::bitlog2);
{
typedef cl_allocator_base cls;
py::class_<cls /*, boost::noncopyable */> wrapper(
m, "_tools_AllocatorBase"/*, py::no_init */);
.def("__call__", allocator_call)
;
}
{
typedef cl_deferred_allocator cls;
py::class_<cls, cl_allocator_base> wrapper(
m, "_tools_DeferredAllocator");
wrapper
.def(py::init<
std::shared_ptr<pyopencl::context> const &>())
.def(py::init<
std::shared_ptr<pyopencl::context> const &,
cl_mem_flags>(),
py::arg("queue"), py::arg("mem_flags"))
}
{
typedef cl_immediate_allocator cls;
py::class_<cls, cl_allocator_base> wrapper(
m, "_tools_ImmediateAllocator");
wrapper
.def(py::init<pyopencl::command_queue &>())
.def(py::init<pyopencl::command_queue &, cl_mem_flags>(),
py::arg("queue"), py::arg("mem_flags"))
}
{
typedef pyopencl::memory_pool<cl_allocator_base> cls;
py::class_<
cls, /* boost::noncopyable, */
std::shared_ptr<cls>> wrapper( m, "MemoryPool");
.def(py::init<cl_allocator_base const &, unsigned>(),
py::arg("allocator"),
py::arg("leading_bits_in_bin_id")=4
)
.def("allocate", device_pool_allocate)
.def("__call__", device_pool_allocate)
// undoc for now
.DEF_SIMPLE_METHOD(set_trace)
;
expose_memory_pool(wrapper);
}
{
typedef pooled_buffer cls;
py::class_<cls, /* boost::noncopyable, */
m, "PooledBuffer"/* , py::no_init */)
.def("release", &cls::free)
;
}
}