diff --git a/src/c_wrapper/buffer.cpp b/src/c_wrapper/buffer.cpp index f72a787d675298c79fd158b09375d68fc78ab10f..a4d999dded5334cb21d8741a42a2be8df9ca558d 100644 --- a/src/c_wrapper/buffer.cpp +++ b/src/c_wrapper/buffer.cpp @@ -75,7 +75,7 @@ create_buffer(clobj_t *buffer, clobj_t _ctx, cl_mem_flags flags, } error* -enqueue_read_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, +enqueue_read_buffer(clobj_t *evt, clobj_t _queue, clobj_t _mem, void *buffer, size_t size, size_t device_offset, const clobj_t *_wait_for, uint32_t num_wait_for, int is_blocking, void *pyobj) @@ -84,19 +84,17 @@ enqueue_read_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, auto queue = static_cast(_queue); auto mem = static_cast(_mem); return c_handle_error([&] { - cl_event evt; retry_mem_error([&] { pyopencl_call_guarded( clEnqueueReadBuffer, queue, mem, cast_bool(is_blocking), device_offset, size, - buffer, wait_for, &evt); + buffer, wait_for, nanny_event_out(evt, pyobj)); }); - *_evt = new_nanny_event(evt, pyobj); }); } error* -enqueue_write_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, +enqueue_write_buffer(clobj_t *evt, clobj_t _queue, clobj_t _mem, const void *buffer, size_t size, size_t device_offset, const clobj_t *_wait_for, uint32_t num_wait_for, int is_blocking, void *pyobj) @@ -105,19 +103,17 @@ enqueue_write_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, auto queue = static_cast(_queue); auto mem = static_cast(_mem); return c_handle_error([&] { - cl_event evt; retry_mem_error([&] { pyopencl_call_guarded( clEnqueueWriteBuffer, queue, mem, cast_bool(is_blocking), device_offset, size, buffer, - wait_for, &evt); + wait_for, nanny_event_out(evt, pyobj)); }); - *_evt = new_nanny_event(evt, pyobj); }); } error* -enqueue_copy_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _src, clobj_t _dst, +enqueue_copy_buffer(clobj_t *evt, clobj_t _queue, clobj_t _src, clobj_t _dst, ptrdiff_t byte_count, size_t src_offset, size_t dst_offset, const clobj_t *_wait_for, uint32_t num_wait_for) { @@ -138,18 +134,16 @@ enqueue_copy_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _src, clobj_t _dst, } const auto wait_for = buf_from_class(_wait_for, num_wait_for); - cl_event evt; retry_mem_error([&] { pyopencl_call_guarded( clEnqueueCopyBuffer, queue, src, dst, src_offset, - dst_offset, byte_count, wait_for, &evt); + dst_offset, byte_count, wait_for, event_out(evt)); }); - *_evt = new_event(evt); }); } error* -enqueue_fill_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, void *pattern, +enqueue_fill_buffer(clobj_t *evt, clobj_t _queue, clobj_t _mem, void *pattern, size_t psize, size_t offset, size_t size, const clobj_t *_wait_for, uint32_t num_wait_for) { @@ -157,12 +151,10 @@ enqueue_fill_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, void *pattern, auto queue = static_cast(_queue); auto mem = static_cast(_mem); return c_handle_error([&] { - cl_event evt; retry_mem_error([&] { pyopencl_call_guarded( clEnqueueFillBuffer, queue, mem, pattern, psize, - offset, size, wait_for, &evt); + offset, size, wait_for, event_out(evt)); }); - *_evt = new_event(evt); }); } diff --git a/src/c_wrapper/clhelper.h b/src/c_wrapper/clhelper.h index 003e23b8c8730aeeed78b38b7fc4d3364810dc98..593e88bbc5d40f299a1b79fa3f393167a5f6c1d7 100644 --- a/src/c_wrapper/clhelper.h +++ b/src/c_wrapper/clhelper.h @@ -141,6 +141,67 @@ convert_obj(cl_int (*clRelease)(CLType), const char *name, CLType cl_obj, #define pyopencl_convert_obj(type, func, args...) \ pyopencl::convert_obj(func, #func, args) +template +class _CLObjOutArg : public OutArg { + typedef typename CLObj::cl_type CLType; + clobj_t *const m_ret; + CLType m_clobj; + cl_int (*m_release)(CLType); + const char *m_name; + std::tuple m_t1; + template + PYOPENCL_INLINE CLObj* + __new_obj(seq) + { + return new CLObj(m_clobj, false, std::get(m_t1)...); + } +public: + PYOPENCL_INLINE + _CLObjOutArg(clobj_t *ret, cl_int (*release)(CLType), + const char *name, T... t1) + : m_ret(ret), m_clobj(nullptr), m_release(release), + m_name(name), m_t1(t1...) + { + } + PYOPENCL_INLINE + _CLObjOutArg(_CLObjOutArg &&other) + : m_ret(other.m_ret), m_clobj(other.m_clobj), + m_release(other.m_release), m_name(other.m_name) + { + std::swap(m_t1, other.m_t1); + } + PYOPENCL_INLINE typename CLObj::cl_type* + get() + { + return &m_clobj; + } + PYOPENCL_INLINE void + finish() + { + *m_ret = __new_obj(typename gens::type()); + } + PYOPENCL_INLINE void + cleanup(bool finished) + { + if (finished) { + delete *m_ret; + *m_ret = nullptr; + } else { + call_guarded_cleanup(m_release, m_name, m_clobj); + } + } +}; + +template +static PYOPENCL_INLINE _CLObjOutArg +make_cloutarg(clobj_t *ret, cl_int (*release)(typename CLObj::cl_type), + const char *name, T... t1) +{ + return _CLObjOutArg(ret, release, name, t1...); +} +#define pyopencl_outarg(type, ret, func, args...) \ + pyopencl::make_cloutarg(ret, func, #func, ##args) + // {{{ extension function pointers #if PYOPENCL_CL_VERSION >= 0x1020 diff --git a/src/c_wrapper/command_queue.cpp b/src/c_wrapper/command_queue.cpp index 1f0eb1e619812fdc301cd6478620a643b582004e..583374a461a0f471936ec27420ed63247ce4d40c 100644 --- a/src/c_wrapper/command_queue.cpp +++ b/src/c_wrapper/command_queue.cpp @@ -84,42 +84,36 @@ command_queue__flush(clobj_t queue) #if PYOPENCL_CL_VERSION >= 0x1020 error* -enqueue_marker_with_wait_list(clobj_t *_evt, clobj_t _queue, +enqueue_marker_with_wait_list(clobj_t *evt, clobj_t _queue, const clobj_t *_wait_for, uint32_t num_wait_for) { auto queue = static_cast(_queue); const auto wait_for = buf_from_class(_wait_for, num_wait_for); return c_handle_error([&] { - cl_event evt; pyopencl_call_guarded(clEnqueueMarkerWithWaitList, queue, - wait_for, &evt); - *_evt = new_event(evt); + wait_for, event_out(evt)); }); } error* -enqueue_barrier_with_wait_list(clobj_t *_evt, clobj_t _queue, +enqueue_barrier_with_wait_list(clobj_t *evt, clobj_t _queue, const clobj_t *_wait_for, uint32_t num_wait_for) { auto queue = static_cast(_queue); const auto wait_for = buf_from_class(_wait_for, num_wait_for); return c_handle_error([&] { - cl_event evt; pyopencl_call_guarded(clEnqueueBarrierWithWaitList, queue, - wait_for, &evt); - *_evt = new_event(evt); + wait_for, event_out(evt)); }); } #endif error* -enqueue_marker(clobj_t *_evt, clobj_t _queue) +enqueue_marker(clobj_t *evt, clobj_t _queue) { auto queue = static_cast(_queue); return c_handle_error([&] { - cl_event evt; - pyopencl_call_guarded(clEnqueueMarker, queue, &evt); - *_evt = new_event(evt); + pyopencl_call_guarded(clEnqueueMarker, queue, event_out(evt)); }); } diff --git a/src/c_wrapper/error.h b/src/c_wrapper/error.h index 6d59bf446ace89395d36bcd52874e68707578302..0bc52ed89741832f485c0bbb0c6521a0c9ac829b 100644 --- a/src/c_wrapper/error.h +++ b/src/c_wrapper/error.h @@ -97,13 +97,93 @@ struct __CLArgGetter { } }; +template +struct __CLFinish { + static PYOPENCL_INLINE void + call(T) + { + } +}; + +template +struct __CLFinish().finish()))> { + static PYOPENCL_INLINE void + call(T v) + { + v.finish(); + } +}; + +template +struct __CLCleanup { + static PYOPENCL_INLINE void + call(T) + { + } +}; + +template +struct __CLCleanup().cleanup()))> { + static PYOPENCL_INLINE void + call(T v) + { + v.cleanup(); + } +}; + +template class Caller, size_t n, typename T> +struct __CLCall { + static PYOPENCL_INLINE void + call(T &&t) + { + __CLCall::call(std::forward(t)); + Caller(t))>::call(std::get(t)); + } +}; + +template class Caller, typename T> +struct __CLCall { + static PYOPENCL_INLINE void + call(T &&t) + { + Caller(t))>::call(std::get<0>(t)); + } +}; + +template +class CLArgPack : public ArgPack { +public: + using ArgPack::ArgPack; + template + PYOPENCL_INLINE auto + clcall(Func func) + -> decltype(this->template call<__CLArgGetter>(func)) + { + auto res = this->template call<__CLArgGetter>(func); + typename CLArgPack::tuple_base *that = this; + __CLCall<__CLFinish, sizeof...(Types) - 1, + decltype(*that)>::call(*that); + __CLCall<__CLCleanup, sizeof...(Types) - 1, + decltype(*that)>::call(*that); + return res; + } +}; + +template +static PYOPENCL_INLINE CLArgPack::type...> +make_clargpack(Types&&... args) +{ + return CLArgPack::type...>( + std::forward(args)...); +} + template static PYOPENCL_INLINE void call_guarded(cl_int (*func)(ArgTypes...), const char *name, ArgTypes2&&... args) { print_call_trace(name); - auto argpack = make_argpack(std::forward(args)...); - cl_int status_code = argpack.template call<__CLArgGetter>(func); + auto argpack = make_clargpack(std::forward(args)...); + cl_int status_code = argpack.clcall(func); if (status_code != CL_SUCCESS) { throw clerror(name, status_code); } @@ -115,9 +195,9 @@ call_guarded(T (*func)(ArgTypes...), const char *name, ArgTypes2&&... args) { print_call_trace(name); cl_int status_code = CL_SUCCESS; - auto argpack = make_argpack(std::forward(args)..., - &status_code); - T res = argpack.template call<__CLArgGetter>(func); + auto argpack = make_clargpack(std::forward(args)..., + &status_code); + T res = argpack.clcall(func); if (status_code != CL_SUCCESS) { throw clerror(name, status_code); } @@ -132,8 +212,8 @@ call_guarded_cleanup(cl_int (*func)(ArgTypes...), const char *name, ArgTypes2&&... args) { print_call_trace(name); - auto argpack = make_argpack(std::forward(args)...); - cl_int status_code = argpack.template call<__CLArgGetter>(func); + auto argpack = make_clargpack(std::forward(args)...); + cl_int status_code = argpack.clcall(func); if (status_code != CL_SUCCESS) { std::cerr << ("PyOpenCL WARNING: a clean-up operation failed " diff --git a/src/c_wrapper/event.h b/src/c_wrapper/event.h index 3bcd09025422e01f285c7f2f30babbc479ca96d0..964efe0de6e4c8338a3f0999c0976a76a28e8a34 100644 --- a/src/c_wrapper/event.h +++ b/src/c_wrapper/event.h @@ -32,10 +32,11 @@ public: void set_callback(cl_int type, const std::function &func); #endif }; -PYOPENCL_USE_RESULT static PYOPENCL_INLINE event* -new_event(cl_event evt) +static PYOPENCL_INLINE auto +event_out(clobj_t *ret) + -> decltype(pyopencl_outarg(event, ret, clReleaseEvent)) { - return pyopencl_convert_obj(event, clReleaseEvent, evt); + return pyopencl_outarg(event, ret, clReleaseEvent); } class nanny_event : public event { @@ -57,10 +58,11 @@ public: } void finished(); }; -PYOPENCL_USE_RESULT static PYOPENCL_INLINE event* -new_nanny_event(cl_event evt, void *ward) +static PYOPENCL_INLINE auto +nanny_event_out(clobj_t *ret, void *ward) + -> decltype(pyopencl_outarg(nanny_event, ret, clReleaseEvent, ward)) { - return pyopencl_convert_obj(nanny_event, clReleaseEvent, evt, ward); + return pyopencl_outarg(nanny_event, ret, clReleaseEvent, ward); } // }}} diff --git a/src/c_wrapper/function.h b/src/c_wrapper/function.h index bd75a0ca10ea140cf101fc241a295ab727c3662c..5d07ca695f40f18edeb49032ad8ca934b0ef3cdb 100644 --- a/src/c_wrapper/function.h +++ b/src/c_wrapper/function.h @@ -26,7 +26,7 @@ struct gens<0, S...> { }; template -static inline auto +static PYOPENCL_INLINE auto _call_func(Function func, seq, std::tuple &args) -> decltype(func(std::forward(std::get(args))...)) { @@ -34,7 +34,7 @@ _call_func(Function func, seq, std::tuple &args) } template -static inline auto +static PYOPENCL_INLINE auto call_tuple(Function &&func, T &&args) -> decltype(_call_func(std::forward(func), typename gens::value>::type(), @@ -52,51 +52,60 @@ using _ArgPackBase = std::tuple >...>; template class Convert, typename... Types> class ArgPack : public _ArgPackBase { - typedef _ArgPackBase _base; +public: + typedef _ArgPackBase tuple_base; +private: template - static inline std::tuple + static PYOPENCL_INLINE std::tuple ensure_tuple(T &&v) { return std::tuple(std::forward(v)); } template - static inline std::tuple + static PYOPENCL_INLINE std::tuple ensure_tuple(std::tuple &&t) { return t; } + static PYOPENCL_INLINE std::tuple<> + ensure_tuple() + { + return std::tuple<>(); + } template using ArgConvert = Convert<_ArgType >; template class Getter, int... S> - inline auto + PYOPENCL_INLINE auto __get(seq) - -> decltype(std::tuple_cat(ensure_tuple(Getter >::get( - std::get(*(_base*)this)))...)) + -> decltype(std::tuple_cat( + ensure_tuple(Getter >::get( + std::get(*(tuple_base*)this)))...)) { - return std::tuple_cat(ensure_tuple(Getter >::get( - std::get(*(_base*)this)))...); + return std::tuple_cat( + ensure_tuple(Getter >::get( + std::get(*(tuple_base*)this)))...); } public: template ArgPack(Types2&&... arg_orig) - : _base(ArgConvert(arg_orig)...) + : tuple_base(ArgConvert(arg_orig)...) { } ArgPack(ArgPack &&other) - : _base(static_cast<_base&&>(other)) + : tuple_base(static_cast(other)) { } // GCC Bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57543 template class Getter> - inline auto + PYOPENCL_INLINE auto get() -> decltype(this->__get( typename gens::type())) { return __get(typename gens::type()); } template class Getter, typename Func> - inline auto + PYOPENCL_INLINE auto call(Func func) -> decltype(call_tuple(func, this->get())) { @@ -105,7 +114,7 @@ public: }; template class Convert, typename... Types> -static inline ArgPack...> +static PYOPENCL_INLINE ArgPack...> make_argpack(Types&&... args) { return ArgPack...>( diff --git a/src/c_wrapper/gl_obj.cpp b/src/c_wrapper/gl_obj.cpp index 682c630d1c45296cacdded03c5f334c0543586ab..690a7ee237287dff6f308cc0426a949cef28427b 100644 --- a/src/c_wrapper/gl_obj.cpp +++ b/src/c_wrapper/gl_obj.cpp @@ -57,18 +57,16 @@ typedef cl_int (*clEnqueueGLObjectFunc)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*); -PYOPENCL_USE_RESULT static PYOPENCL_INLINE event* +static PYOPENCL_INLINE void enqueue_gl_objects(clEnqueueGLObjectFunc func, const char *name, - command_queue *cq, const clobj_t *mem_objects, + clobj_t *evt, command_queue *cq, const clobj_t *mem_objects, uint32_t num_mem_objects, const clobj_t *wait_for, uint32_t num_wait_for) { const auto _wait_for = buf_from_class(wait_for, num_wait_for); const auto _mem_objs = buf_from_class( mem_objects, num_mem_objects); - cl_event evt; - call_guarded(func, name, cq, _mem_objs, _wait_for, &evt); - return new_event(evt); + call_guarded(func, name, cq, _mem_objs, _wait_for, event_out(evt)); } #define enqueue_gl_objects(what, args...) \ enqueue_gl_objects(clEnqueue##what##GLObjects, \ @@ -175,8 +173,7 @@ create_from_gl_buffer(clobj_t *ptr, clobj_t _ctx, return c_handle_error([&] { cl_mem mem = pyopencl_call_guarded(clCreateFromGLBuffer, ctx, flags, bufobj); - *ptr = pyopencl_convert_obj(gl_buffer, - clReleaseMemObject, mem); + *ptr = pyopencl_convert_obj(gl_buffer, clReleaseMemObject, mem); }); } @@ -194,27 +191,27 @@ create_from_gl_renderbuffer(clobj_t *ptr, clobj_t _ctx, } error* -enqueue_acquire_gl_objects(clobj_t *_evt, clobj_t queue, +enqueue_acquire_gl_objects(clobj_t *evt, clobj_t queue, const clobj_t *mem_objects, uint32_t num_mem_objects, const clobj_t *wait_for, uint32_t num_wait_for) { return c_handle_error([&] { - *_evt = enqueue_gl_objects( - Acquire, static_cast(queue), + enqueue_gl_objects( + Acquire, evt, static_cast(queue), mem_objects, num_mem_objects, wait_for, num_wait_for); }); } error* -enqueue_release_gl_objects(clobj_t *event, clobj_t queue, +enqueue_release_gl_objects(clobj_t *evt, clobj_t queue, const clobj_t *mem_objects, uint32_t num_mem_objects, const clobj_t *wait_for, uint32_t num_wait_for) { return c_handle_error([&] { - *event = enqueue_gl_objects( - Release, static_cast(queue), + enqueue_gl_objects( + Release, evt, static_cast(queue), mem_objects, num_mem_objects, wait_for, num_wait_for); }); } diff --git a/src/c_wrapper/image.cpp b/src/c_wrapper/image.cpp index 1ac7c3d578390914beb513dd127845f8c1fae053..986e792c1c4b8dbed7d8cb3436d10859009c8152 100644 --- a/src/c_wrapper/image.cpp +++ b/src/c_wrapper/image.cpp @@ -177,7 +177,7 @@ image__get_fill_type(clobj_t img) } error* -enqueue_read_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, +enqueue_read_image(clobj_t *evt, clobj_t _queue, clobj_t _mem, const size_t *_origin, size_t origin_l, const size_t *_region, size_t region_l, void *buffer, size_t row_pitch, size_t slice_pitch, @@ -190,19 +190,18 @@ enqueue_read_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, ConstBuffer origin(_origin, origin_l); ConstBuffer region(_region, region_l); return c_handle_error([&] { - cl_event evt; retry_mem_error([&] { pyopencl_call_guarded( clEnqueueReadImage, queue, img, cast_bool(is_blocking), origin, region, row_pitch, - slice_pitch, buffer, wait_for, &evt); + slice_pitch, buffer, wait_for, + nanny_event_out(evt, pyobj)); }); - *_evt = new_nanny_event(evt, pyobj); }); } error* -enqueue_copy_image(clobj_t *_evt, clobj_t _queue, clobj_t _src, clobj_t _dst, +enqueue_copy_image(clobj_t *evt, clobj_t _queue, clobj_t _src, clobj_t _dst, const size_t *_src_origin, size_t src_origin_l, const size_t *_dst_origin, size_t dst_origin_l, const size_t *_region, size_t region_l, @@ -216,18 +215,16 @@ enqueue_copy_image(clobj_t *_evt, clobj_t _queue, clobj_t _src, clobj_t _dst, ConstBuffer dst_origin(_dst_origin, dst_origin_l); ConstBuffer region(_region, region_l); return c_handle_error([&] { - cl_event evt; retry_mem_error([&] { pyopencl_call_guarded( clEnqueueCopyImage, queue, src, dst, src_origin, - dst_origin, region, wait_for, &evt); + dst_origin, region, wait_for, event_out(evt)); }); - *_evt = new_event(evt); }); } error* -enqueue_write_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, +enqueue_write_image(clobj_t *evt, clobj_t _queue, clobj_t _mem, const size_t *_origin, size_t origin_l, const size_t *_region, size_t region_l, const void *buffer, size_t row_pitch, size_t slice_pitch, @@ -240,14 +237,13 @@ enqueue_write_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, ConstBuffer origin(_origin, origin_l); ConstBuffer region(_region, region_l); return c_handle_error([&] { - cl_event evt; retry_mem_error([&] { pyopencl_call_guarded( clEnqueueWriteImage, queue, img, cast_bool(is_blocking), origin, region, row_pitch, - slice_pitch, buffer, wait_for, &evt); + slice_pitch, buffer, wait_for, + nanny_event_out(evt, pyobj)); }); - *_evt = new_nanny_event(evt, pyobj); }); } diff --git a/src/c_wrapper/kernel.cpp b/src/c_wrapper/kernel.cpp index 5f9555e199c03c1ab829488d9b930cc804872d23..a8dec6049383fe415e7e8ec6c84b90b983c4a803 100644 --- a/src/c_wrapper/kernel.cpp +++ b/src/c_wrapper/kernel.cpp @@ -135,7 +135,7 @@ kernel__get_work_group_info(clobj_t _knl, cl_kernel_work_group_info param, } error* -enqueue_nd_range_kernel(clobj_t *_evt, clobj_t _queue, clobj_t _knl, +enqueue_nd_range_kernel(clobj_t *evt, clobj_t _queue, clobj_t _knl, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, @@ -145,30 +145,26 @@ enqueue_nd_range_kernel(clobj_t *_evt, clobj_t _queue, clobj_t _knl, auto knl = static_cast(_knl); const auto wait_for = buf_from_class(_wait_for, num_wait_for); return c_handle_error([&] { - cl_event evt; retry_mem_error([&] { pyopencl_call_guarded( clEnqueueNDRangeKernel, queue, knl, work_dim, global_work_offset, global_work_size, - local_work_size, wait_for, &evt); + local_work_size, wait_for, event_out(evt)); }); - *_evt = new_event(evt); }); } error* -enqueue_task(clobj_t *_evt, clobj_t _queue, clobj_t _knl, +enqueue_task(clobj_t *evt, clobj_t _queue, clobj_t _knl, const clobj_t *_wait_for, uint32_t num_wait_for) { auto queue = static_cast(_queue); auto knl = static_cast(_knl); const auto wait_for = buf_from_class(_wait_for, num_wait_for); return c_handle_error([&] { - cl_event evt; retry_mem_error([&] { pyopencl_call_guarded( - clEnqueueTask, queue, knl, wait_for, &evt); + clEnqueueTask, queue, knl, wait_for, event_out(evt)); }); - *_evt = new_event(evt); }); } diff --git a/src/c_wrapper/memory_map.cpp b/src/c_wrapper/memory_map.cpp index 2dbbe02b913a8b888532fa19f90aba8924c203fb..d82b571dfa4e584cf59d7a5ef4f56ee6bc67af4c 100644 --- a/src/c_wrapper/memory_map.cpp +++ b/src/c_wrapper/memory_map.cpp @@ -16,9 +16,9 @@ memory_map::~memory_map() m_mem, this, 0, nullptr, nullptr); } -event* -memory_map::release(const command_queue *queue, const clobj_t *_wait_for, - uint32_t num_wait_for) const +void +memory_map::release(clobj_t *evt, const command_queue *queue, + const clobj_t *_wait_for, uint32_t num_wait_for) const { if (!m_valid.exchange(false)) { throw clerror("MemoryMap.release", CL_INVALID_VALUE, @@ -26,10 +26,8 @@ memory_map::release(const command_queue *queue, const clobj_t *_wait_for, } const auto wait_for = buf_from_class(_wait_for, num_wait_for); queue = queue ? queue : &m_queue; - cl_event evt; pyopencl_call_guarded(clEnqueueUnmapMemObject, queue, - m_mem, this, wait_for, &evt); - return new_event(evt); + m_mem, this, wait_for, event_out(evt)); } generic_info @@ -44,6 +42,20 @@ memory_map::intptr() const return m_valid ? (intptr_t)data() : 0; } +memory_map* +convert_memory_map(clobj_t evt, command_queue *queue, + memory_object *buf, void *res) +{ + try { + return new memory_map(queue, buf, res); + } catch (...) { + delete evt; + pyopencl_call_guarded_cleanup(clEnqueueUnmapMemObject, queue, + buf, res, 0, nullptr, nullptr); + throw; + } +} + } // c wrapper @@ -58,7 +70,7 @@ memory_map__release(clobj_t _map, clobj_t _queue, const clobj_t *_wait_for, auto map = static_cast(_map); auto queue = static_cast(_queue); return c_handle_error([&] { - *evt = map->release(queue, _wait_for, num_wait_for); + map->release(evt, queue, _wait_for, num_wait_for); }); } @@ -68,26 +80,8 @@ memory_map__data(clobj_t _map) return static_cast(_map)->data(); } -memory_map* -memory_map::convert(clobj_t *_evt, cl_event evt, command_queue *queue, - memory_object *buf, void *res) -{ - try { - *_evt = new event(evt, false); - evt = 0; - return new memory_map(queue, buf, res); - } catch (...) { - if (evt) { - pyopencl_call_guarded_cleanup(clReleaseEvent, evt); - } - pyopencl_call_guarded_cleanup(clEnqueueUnmapMemObject, queue, - buf, res, 0, nullptr, nullptr); - throw; - } -} - error* -enqueue_map_image(clobj_t *_evt, clobj_t *map, clobj_t _queue, clobj_t _mem, +enqueue_map_image(clobj_t *evt, clobj_t *map, clobj_t _queue, clobj_t _mem, cl_map_flags flags, const size_t *_origin, size_t origin_l, const size_t *_region, size_t region_l, size_t *row_pitch, size_t *slice_pitch, const clobj_t *_wait_for, @@ -99,18 +93,18 @@ enqueue_map_image(clobj_t *_evt, clobj_t *map, clobj_t _queue, clobj_t _mem, ConstBuffer origin(_origin, origin_l); ConstBuffer region(_region, region_l); return c_handle_error([&] { - cl_event evt; void *res = retry_mem_error([&] { return pyopencl_call_guarded( clEnqueueMapImage, queue, img, cast_bool(block), flags, - origin, region, row_pitch, slice_pitch, wait_for, &evt); + origin, region, row_pitch, slice_pitch, wait_for, + event_out(evt)); }); - *map = memory_map::convert(_evt, evt, queue, img, res); + *map = convert_memory_map(*evt, queue, img, res); }); } error* -enqueue_map_buffer(clobj_t *_evt, clobj_t *map, clobj_t _queue, clobj_t _mem, +enqueue_map_buffer(clobj_t *evt, clobj_t *map, clobj_t _queue, clobj_t _mem, cl_map_flags flags, size_t offset, size_t size, const clobj_t *_wait_for, uint32_t num_wait_for, int block) @@ -119,12 +113,11 @@ enqueue_map_buffer(clobj_t *_evt, clobj_t *map, clobj_t _queue, clobj_t _mem, auto buf = static_cast(_mem); const auto wait_for = buf_from_class(_wait_for, num_wait_for); return c_handle_error([&] { - cl_event evt; void *res = retry_mem_error([&] { return pyopencl_call_guarded( clEnqueueMapBuffer, queue, buf, cast_bool(block), - flags, offset, size, wait_for, &evt); + flags, offset, size, wait_for, event_out(evt)); }); - *map = memory_map::convert(_evt, evt, queue, buf, res); + *map = convert_memory_map(*evt, queue, buf, res); }); } diff --git a/src/c_wrapper/memory_map.h b/src/c_wrapper/memory_map.h index 85d90917589e1e744eda976e2b0239f3bff0de4e..ac7bdc17dd597a7ee343dfa26c4ab9c6442d5809 100644 --- a/src/c_wrapper/memory_map.h +++ b/src/c_wrapper/memory_map.h @@ -24,12 +24,10 @@ public: : clobj(ptr), m_valid(true), m_queue(*queue), m_mem(*mem) {} ~memory_map(); - event *release(const command_queue *queue, const clobj_t *_wait_for, - uint32_t num_wait_for) const; + void release(clobj_t *evt, const command_queue *queue, + const clobj_t *wait_for, uint32_t num_wait_for) const; generic_info get_info(cl_uint) const; intptr_t intptr() const; - static memory_map *convert(clobj_t*, cl_event, command_queue *queue, - memory_object *buf, void *res); }; // }}} diff --git a/src/c_wrapper/utils.h b/src/c_wrapper/utils.h index 241ef0af008844da52d18c46eba63d2ce8d4c018..f96e61997eb3335bf8dcdc8ec1ea4fae2ee47f90 100644 --- a/src/c_wrapper/utils.h +++ b/src/c_wrapper/utils.h @@ -181,6 +181,50 @@ public: } }; +class OutArg { +}; + +template +class CLArg::value>::type> { +private: + bool m_finished; + bool m_need_cleanup; + T &m_arg; +public: + CLArg(T &arg) + : m_finished(false), m_need_cleanup(true), m_arg(arg) + { + } + CLArg(CLArg &&other) noexcept + : m_finished(other.m_finished), m_need_cleanup(other.m_need_cleanup), + m_arg(other.m_arg) + {} + PYOPENCL_INLINE auto + convert() + -> decltype(m_arg.get()) + { + return m_arg.get(); + } + PYOPENCL_INLINE void + finish() + { + m_arg.finish(); + m_finished = true; + } + PYOPENCL_INLINE void + cleanup() + { + m_need_cleanup = false; + } + ~CLArg() + { + if (m_need_cleanup) { + m_arg.cleanup(m_finished); + } + } +}; + template struct _D { void operator()(T *p) {