diff --git a/src/c_wrapper/event.cpp b/src/c_wrapper/event.cpp index c59fb3fc41848c1e925b09d2d6b393548ee30c3d..8104d9aecc437c0edc490e8dc871db37ee1038ef 100644 --- a/src/c_wrapper/event.cpp +++ b/src/c_wrapper/event.cpp @@ -4,6 +4,9 @@ #include "async.h" #include "pyhelper.h" +#include <atomic> +#include <thread> + namespace pyopencl { template class clobj<cl_event>; @@ -12,14 +15,37 @@ template void print_clobj<event>(std::ostream&, const event*); template void print_buf<cl_event>(std::ostream&, const cl_event*, size_t, ArgType, bool, bool); +class event_private { + mutable volatile std::atomic_bool m_finished; + virtual void + finish() noexcept + {} +public: + virtual + ~event_private() + {} + void + call_finish() noexcept + { + if (m_finished.exchange(true)) + return; + finish(); + } + bool + is_finished() noexcept + { + return m_finished; + } +}; + #if PYOPENCL_CL_VERSION >= 0x1010 class event_callback { std::function<void(cl_int)> m_func; - event_callback(const std::function<void(cl_int)> &func) + event_callback(const std::function<void(cl_int)> &func) noexcept : m_func(func) {} static void - cl_call_and_free(cl_event, cl_int status, void *data) + cl_call_and_free(cl_event, cl_int status, void *data) noexcept { auto cb = static_cast<event_callback*>(data); auto func = cb->m_func; @@ -34,8 +60,53 @@ class event_callback { }; #endif +event::event(cl_event event, bool retain, event_private *p) + : clobj(event), m_p(p) +{ + if (retain) { + try { + pyopencl_call_guarded(clRetainEvent, this); + } catch (...) { + delete m_p; + throw; + } + } +} + +void +event::release_private() noexcept +{ + if (!m_p) + return; + if (m_p->is_finished()) { + delete m_p; + return; + } +#if PYOPENCL_CL_VERSION >= 0x1010 + if (support_cb) { + pyopencl_call_guarded_cleanup(clSetEventCallback, this, CL_COMPLETE, + [] (cl_event, cl_int, void *data) { + event_private *p = + static_cast<event_private*>(data); + p->call_finish(); + delete p; + }, (void*)m_p); + } else { +#endif + std::thread t([] (cl_event evt, event_private *p) { + pyopencl_call_guarded_cleanup(clWaitForEvents, len_arg(evt)); + p->call_finish(); + delete p; + }, data(), m_p); + t.detach(); +#if PYOPENCL_CL_VERSION >= 0x1010 + } +#endif +} + event::~event() { + release_private(); pyopencl_call_guarded_cleanup(clReleaseEvent, this); } @@ -77,10 +148,12 @@ event::get_profiling_info(cl_profiling_info param) const } void -event::wait() +event::wait() const { pyopencl_call_guarded(clWaitForEvents, len_arg(data())); - finished(); + if (m_p) { + m_p->call_finish(); + } } #if PYOPENCL_CL_VERSION >= 0x1010 @@ -99,20 +172,39 @@ event::set_callback(cl_int type, const std::function<void(cl_int)> &func) } #endif -nanny_event::~nanny_event() -{ - if (m_ward) { - wait(); +class nanny_event_private : public event_private { + void *m_ward; + void finished() noexcept + { + void *ward = m_ward; + m_ward = nullptr; + py::deref(ward); } + ~nanny_event_private() + {} +public: + nanny_event_private(void *ward) + : m_ward(nullptr) + { + m_ward = py::ref(ward); + } + PYOPENCL_USE_RESULT PYOPENCL_INLINE void* + get_ward() const noexcept + { + return m_ward; + } +}; + +nanny_event::nanny_event(cl_event evt, bool retain, void *ward) + : event(evt, retain, ward ? new nanny_event_private(ward) : nullptr) +{ } -void -nanny_event::finished() +PYOPENCL_USE_RESULT void* +nanny_event::get_ward() const noexcept { - // No lock needed because multiple release is safe here. - void *ward = m_ward; - m_ward = nullptr; - py::deref(ward); + return (get_p() ? static_cast<nanny_event_private*>(get_p())->get_ward() : + nullptr); } } diff --git a/src/c_wrapper/event.h b/src/c_wrapper/event.h index 17524b96ffe6e9556ead076f312ef70058006880..68ce507e6c70ef495b3d5cf33af79dc392c802c3 100644 --- a/src/c_wrapper/event.h +++ b/src/c_wrapper/event.h @@ -12,26 +12,27 @@ extern template void print_arg<cl_event>(std::ostream&, const cl_event&, bool); extern template void print_buf<cl_event>(std::ostream&, const cl_event*, size_t, ArgType, bool, bool); +class event_private; + class event : public clobj<cl_event> { -public: - PYOPENCL_DEF_CL_CLASS(EVENT); - PYOPENCL_INLINE - event(cl_event event, bool retain) - : clobj(event) + event_private *m_p; + void release_private() noexcept; +protected: + PYOPENCL_INLINE event_private* + get_p() const { - if (retain) { - pyopencl_call_guarded(clRetainEvent, this); - } + return m_p; } +public: + PYOPENCL_DEF_CL_CLASS(EVENT); + event(cl_event event, bool retain, event_private *p=nullptr); ~event(); generic_info get_info(cl_uint param) const; PYOPENCL_USE_RESULT generic_info get_profiling_info(cl_profiling_info param) const; - virtual void - finished() - {} - void wait(); + void wait() const; #if PYOPENCL_CL_VERSION >= 0x1010 + bool support_cb; void set_callback(cl_int type, const std::function<void(cl_int)> &func); #endif }; @@ -44,23 +45,9 @@ event_out(clobj_t *ret) -> decltype(pyopencl_outarg(event, ret, clReleaseEvent)) extern template void print_clobj<event>(std::ostream&, const event*); class nanny_event : public event { -private: - void *m_ward; public: - nanny_event(cl_event evt, bool retain, void *ward=nullptr) - : event(evt, retain), m_ward(nullptr) - { - if (ward) { - m_ward = py::ref(ward); - } - } - ~nanny_event(); - PYOPENCL_USE_RESULT PYOPENCL_INLINE void* - get_ward() const - { - return m_ward; - } - void finished(); + nanny_event(cl_event evt, bool retain, void *ward=nullptr); + PYOPENCL_USE_RESULT void *get_ward() const noexcept; }; static PYOPENCL_INLINE auto nanny_event_out(clobj_t *ret, void *ward)