diff --git a/pyopencl/_cffi.py b/pyopencl/_cffi.py index 618415aac2bba83cb5221882d293ae5588793d72..97e2db24946b6d5467e886b01b69de77c2523710 100644 --- a/pyopencl/_cffi.py +++ b/pyopencl/_cffi.py @@ -168,30 +168,15 @@ _gc_collect = _ffi.callback('int(void)')(gc.collect) _lib.set_gc(_gc_collect) _pyrefs = {} -@_ffi.callback('void(unsigned long)') -def _py_deref(_id): +@_ffi.callback('void(void*)') +def _py_deref(handle): try: - del _pyrefs[_id] + del _pyrefs[handle] except: pass -def _get_ref_func(obj): - @_ffi.callback('void(unsigned long)') - def _insert(_id): - _pyrefs[_id] = obj - return _insert +@_ffi.callback('void(void*)') +def _py_ref(handle): + _pyrefs[handle] = handle -def _find_obj(_id): - return _pyrefs.get(_id, None) - -_lib.set_deref(_py_deref) - -import traceback -def _to_c_callback(func, *args, **kwargs): - @_ffi.callback('void()') - def _func(): - try: - func(*args, **kwargs) - except: - traceback.print_exc() - return _func +_lib.set_ref_funcs(_py_ref, _py_deref) diff --git a/pyopencl/c_wrapper/wrap_cl_core.h b/pyopencl/c_wrapper/wrap_cl_core.h index 58649e62ab962fe686bac06f916c4aa804478f89..0480d0c84718d7b9c6bd1236e44c8f5b350305dc 100644 --- a/pyopencl/c_wrapper/wrap_cl_core.h +++ b/pyopencl/c_wrapper/wrap_cl_core.h @@ -43,7 +43,7 @@ int get_cl_version(); void free_pointer(void*); void free_pointer_array(void**, uint32_t size); void set_gc(int (*func)()); -void set_deref(void (*func)(unsigned long)); +void set_ref_funcs(void (*ref)(void*), void (*deref)(void*)); int have_gl(); unsigned bitlog2(unsigned long v); @@ -115,10 +115,10 @@ error *event__get_profiling_info(clobj_t event, cl_profiling_info param, generic_info *out); error *event__wait(clobj_t event); error *event__set_callback(clobj_t _evt, cl_int type, void (*cb)(cl_int), - void (*ref)(unsigned long)); + void *pyobj); error *wait_for_events(const clobj_t *_wait_for, uint32_t num_wait_for); // Nanny Event -unsigned long nanny_event__get_ward(clobj_t evt); +void *nanny_event__get_ward(clobj_t evt); // enqueue_* error *enqueue_nd_range_kernel(clobj_t *event, clobj_t queue, clobj_t kernel, cl_uint work_dim, @@ -143,7 +143,7 @@ error *enqueue_barrier(clobj_t queue); error *enqueue_read_buffer(clobj_t *event, clobj_t queue, clobj_t mem, void *buffer, size_t size, size_t device_offset, const clobj_t *wait_for, uint32_t num_wait_for, - int is_blocking, void (*ref)(unsigned long)); + int is_blocking, void *pyobj); error *enqueue_copy_buffer(clobj_t *event, clobj_t queue, clobj_t src, clobj_t dst, ptrdiff_t byte_count, size_t src_offset, size_t dst_offset, @@ -152,7 +152,7 @@ error *enqueue_write_buffer(clobj_t *event, clobj_t queue, clobj_t mem, const void *buffer, size_t size, size_t device_offset, const clobj_t *wait_for, uint32_t num_wait_for, int is_blocking, - void (*ref)(unsigned long)); + void *pyobj); error *enqueue_map_buffer(clobj_t *_evt, clobj_t *mpa, clobj_t _queue, clobj_t _mem, cl_map_flags flags, size_t offset, size_t size, const clobj_t *_wait_for, @@ -167,14 +167,14 @@ error *enqueue_read_image(clobj_t *event, clobj_t queue, clobj_t mem, const size_t *region, size_t region_l, void *buffer, size_t row_pitch, size_t slice_pitch, const clobj_t *wait_for, uint32_t num_wait_for, - int is_blocking, void (*ref)(unsigned long)); + int is_blocking, void *pyobj); error *enqueue_write_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, const size_t *origin, size_t origin_l, const size_t *region, size_t region_l, const void *buffer, size_t row_pitch, size_t slice_pitch, const clobj_t *_wait_for, uint32_t num_wait_for, int is_blocking, - void (*ref)(unsigned long)); + void *pyobj); // CL Object intptr_t clobj__int_ptr(clobj_t obj); error *clobj__get_info(clobj_t obj, cl_uint param, generic_info *out); diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py index 846aab5eab347f60f267c5da5c332a15d9a74609..72dc0ab473ab653abdb379b8596117f54be18030 100644 --- a/pyopencl/cffi_cl.py +++ b/pyopencl/cffi_cl.py @@ -32,7 +32,7 @@ import sys # TODO: can we do without ctypes? import ctypes -from pyopencl._cffi import _ffi, _lib, _get_ref_func, _find_obj, _to_c_callback +from pyopencl._cffi import _ffi, _lib from .compyte.array import f_contiguous_strides, c_contiguous_strides # {{{ compatibility shims @@ -692,7 +692,7 @@ class Event(_Common): def _func(status): cb(status, *args, **kwargs) _handle_error(_lib.event__set_callback(self.ptr, _type, _func, - _get_ref_func(_func))) + _ffi.new_handle(_func))) def wait_for_events(wait_for): _handle_error(_lib.wait_for_events(*_clobj_list(wait_for))) @@ -701,7 +701,7 @@ class NannyEvent(Event): # TODO disable/handle write to buffer from bytes since the data may be moved # by GC def get_ward(self): - return _find_obj(_lib.nanny_event__get_ward(self.ptr)) + return _ffi.from_handle(_lib.nanny_event__get_ward(self.ptr)) # TODO # UserEvent @@ -827,7 +827,7 @@ def _enqueue_read_buffer(queue, mem, hostbuf, device_offset=0, _handle_error(_lib.enqueue_read_buffer( ptr_event, queue.ptr, mem.ptr, c_buf, size, device_offset, c_wait_for, num_wait_for, bool(is_blocking), - _get_ref_func(hostbuf))) + _ffi.new_handle(hostbuf))) return _create_instance(NannyEvent, ptr_event[0]) @@ -849,7 +849,7 @@ def _enqueue_write_buffer(queue, mem, hostbuf, device_offset=0, _handle_error(_lib.enqueue_write_buffer( ptr_event, queue.ptr, mem.ptr, c_buf, size, device_offset, c_wait_for, num_wait_for, bool(is_blocking), - _get_ref_func(c_ref))) + _ffi.new_handle(c_ref))) return _create_instance(NannyEvent, ptr_event[0]) # PyPy bug report: https://bitbucket.org/pypy/pypy/issue/1777/unable-to-create-proper-numpy-array-from @@ -918,7 +918,7 @@ def _enqueue_read_image(queue, mem, origin, region, hostbuf, row_pitch=0, _handle_error(_lib.enqueue_read_image( ptr_event, queue.ptr, mem.ptr, origin, origin_l, region, region_l, c_buf, row_pitch, slice_pitch, c_wait_for, num_wait_for, - bool(is_blocking), _get_ref_func(c_buf))) + bool(is_blocking), _ffi.new_handle(c_buf))) return _create_instance(NannyEvent, ptr_event[0]) def _enqueue_write_image(queue, mem, origin, region, hostbuf, row_pitch=0, @@ -935,7 +935,7 @@ def _enqueue_write_image(queue, mem, origin, region, hostbuf, row_pitch=0, _handle_error(_lib.enqueue_read_image( _event, queue.ptr, mem.ptr, origin, origin_l, region, region_l, c_buf, row_pitch, slice_pitch, c_wait_for, num_wait_for, - bool(is_blocking), _get_ref_func(c_buf))) + bool(is_blocking), _ffi.new_handle(c_buf))) return _create_instance(NannyEvent, _event[0]) # TODO: write_image copy_image fill_image diff --git a/src/c_wrapper/pyhelper.cpp b/src/c_wrapper/pyhelper.cpp index cc490c30bc47b2419bbf41a84da71e838127ef81..d76f7ac60bfe02d0372327989de0395658f0a817 100644 --- a/src/c_wrapper/pyhelper.cpp +++ b/src/c_wrapper/pyhelper.cpp @@ -4,17 +4,6 @@ namespace pyopencl { -static std::atomic pyobj_id = ATOMIC_VAR_INIT(1ul); -unsigned long -next_obj_id() -{ - unsigned long id; - do { - id = std::atomic_fetch_add(&pyobj_id, 1ul); - } while (id == 0); - return id; -} - static int dummy_python_gc() { @@ -22,12 +11,13 @@ dummy_python_gc() } static void -dummy_python_deref(unsigned long) +dummy_python_ref_func(void*) { } int (*python_gc)() = dummy_python_gc; -void (*python_deref)(unsigned long) = dummy_python_deref; +void (*python_deref)(void*) = dummy_python_ref_func; +void (*python_ref)(void*) = dummy_python_ref_func; } @@ -38,7 +28,8 @@ set_gc(int (*func)()) } void -set_deref(void (*func)(unsigned long)) +set_ref_funcs(void (*ref)(void*), void (*deref)(void*)) { - pyopencl::python_deref = func ? func : pyopencl::dummy_python_deref; + pyopencl::python_ref = ref ? ref : pyopencl::dummy_python_ref_func; + pyopencl::python_deref = deref ? deref : pyopencl::dummy_python_ref_func; } diff --git a/src/c_wrapper/utils.h b/src/c_wrapper/utils.h index adc414db31c7cfd6fa64bc3c79c96d5fc2ca12d3..6114e69a63328ed91abec846d845c64909e8731c 100644 --- a/src/c_wrapper/utils.h +++ b/src/c_wrapper/utils.h @@ -307,8 +307,8 @@ get_int_info(cl_int (*func)(ArgTypes...), const char *name, // }}} -unsigned long next_obj_id(); -extern void (*python_deref)(unsigned long); +extern void (*python_ref)(void*); +extern void (*python_deref)(void*); } diff --git a/src/c_wrapper/wrap_cl.cpp b/src/c_wrapper/wrap_cl.cpp index cf3d56b4b6550e964cc94e8d612e259c48ca8736..f38f8a5280f2cbf974f588bdcbedd30cfd80eb15 100644 --- a/src/c_wrapper/wrap_cl.cpp +++ b/src/c_wrapper/wrap_cl.cpp @@ -802,14 +802,13 @@ new_event(cl_event evt) class nanny_event : public event { private: - unsigned int m_ward; + void *m_ward; public: - nanny_event(cl_event evt, bool retain, void (*reffunc)(unsigned long)=0) - : event(evt, retain), m_ward(0) + nanny_event(cl_event evt, bool retain, void *ward=NULL) + : event(evt, retain), m_ward(ward) { - if (reffunc) { - m_ward = next_obj_id(); - reffunc(m_ward); + if (ward) { + python_ref(ward); } } ~nanny_event() @@ -818,7 +817,7 @@ public: wait(); } } - unsigned int + void* get_ward() const { return m_ward; @@ -827,15 +826,15 @@ public: finished() { // No lock needed because multiple release is safe here. - unsigned long ward = m_ward; - m_ward = 0; + void *ward = m_ward; + m_ward = NULL; python_deref(ward); } }; static inline event* -new_nanny_event(cl_event evt, void (*reffunc)(unsigned long)) +new_nanny_event(cl_event evt, void *ward) { - return pyopencl_convert_obj(nanny_event, clReleaseEvent, evt, reffunc); + return pyopencl_convert_obj(nanny_event, clReleaseEvent, evt, ward); } // }}} @@ -2313,18 +2312,17 @@ event__wait(clobj_t evt) } #if PYOPENCL_CL_VERSION >= 0x1010 +// TODO directly use pyobj to do callback error* -event__set_callback(clobj_t _evt, cl_int type, void (*cb)(cl_int), - void (*ref)(unsigned long)) +event__set_callback(clobj_t _evt, cl_int type, void (*cb)(cl_int), void *pyobj) { auto evt = static_cast(_evt); return c_handle_error([&] { - unsigned long obj_id = next_obj_id(); evt->set_callback(type, [=] (cl_int status) { cb(status); - python_deref(obj_id); + python_deref(pyobj); }); - ref(obj_id); + python_ref(pyobj); }); } #endif @@ -2340,7 +2338,7 @@ wait_for_events(const clobj_t *_wait_for, uint32_t num_wait_for) } // Nanny Event -unsigned long +void* nanny_event__get_ward(clobj_t evt) { return static_cast(evt)->get_ward(); @@ -2456,7 +2454,7 @@ error* enqueue_read_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, void *buffer, size_t size, size_t device_offset, const clobj_t *_wait_for, uint32_t num_wait_for, - int is_blocking, void (*ref)(unsigned long)) + int is_blocking, void *pyobj) { auto wait_for = buf_from_class(_wait_for, num_wait_for); auto queue = static_cast(_queue); @@ -2469,7 +2467,7 @@ enqueue_read_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, cast_bool(is_blocking), device_offset, size, buffer, num_wait_for, wait_for.get(), &evt); }); - *_evt = new_nanny_event(evt, ref); + *_evt = new_nanny_event(evt, pyobj); }); } @@ -2477,7 +2475,7 @@ error* enqueue_write_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, const void *buffer, size_t size, size_t device_offset, const clobj_t *_wait_for, uint32_t num_wait_for, - int is_blocking, void (*ref)(unsigned long)) + int is_blocking, void *pyobj) { auto wait_for = buf_from_class(_wait_for, num_wait_for); auto queue = static_cast(_queue); @@ -2490,7 +2488,7 @@ enqueue_write_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, cast_bool(is_blocking), device_offset, size, buffer, num_wait_for, wait_for.get(), &evt); }); - *_evt = new_nanny_event(evt, ref); + *_evt = new_nanny_event(evt, pyobj); }); } @@ -2589,7 +2587,7 @@ enqueue_read_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, const size_t *region, size_t region_l, void *buffer, size_t row_pitch, size_t slice_pitch, const clobj_t *_wait_for, uint32_t num_wait_for, - int is_blocking, void (*ref)(unsigned long)) + int is_blocking, void *pyobj) { auto wait_for = buf_from_class(_wait_for, num_wait_for); auto queue = static_cast(_queue); @@ -2613,7 +2611,7 @@ enqueue_read_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, slice_pitch, buffer, num_wait_for, wait_for.get(), &evt); }); - *_evt = new_nanny_event(evt, ref); + *_evt = new_nanny_event(evt, pyobj); }); } @@ -2623,7 +2621,7 @@ enqueue_write_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, const size_t *region, size_t region_l, const void *buffer, size_t row_pitch, size_t slice_pitch, const clobj_t *_wait_for, uint32_t num_wait_for, - int is_blocking, void (*ref)(unsigned long)) + int is_blocking, void *pyobj) { auto wait_for = buf_from_class(_wait_for, num_wait_for); auto queue = static_cast(_queue); @@ -2647,7 +2645,7 @@ enqueue_write_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, slice_pitch, buffer, num_wait_for, wait_for.get(), &evt); }); - *_evt = new_nanny_event(evt, ref); + *_evt = new_nanny_event(evt, pyobj); }); }