From 717e79cbbb88b4402156b0f96bf9ac34070f43c7 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Sat, 24 May 2014 13:12:06 -0400 Subject: [PATCH] use nanny_event --- TODOs | 1 - pyopencl/c_wrapper/wrap_cl_core.h | 9 +++++--- pyopencl/cffi_cl.py | 29 +++++++++++++++--------- src/c_wrapper/wrap_cl.cpp | 37 ++++++++++++++++++------------- 4 files changed, 46 insertions(+), 30 deletions(-) diff --git a/TODOs b/TODOs index 31a4d158..2f1f3fdb 100644 --- a/TODOs +++ b/TODOs @@ -1,4 +1,3 @@ -- nanny events - *_from_int_ptr, register with metaclass - subdevices - Kernel.get_arg_info diff --git a/pyopencl/c_wrapper/wrap_cl_core.h b/pyopencl/c_wrapper/wrap_cl_core.h index af3088ba..5e17b0de 100644 --- a/pyopencl/c_wrapper/wrap_cl_core.h +++ b/pyopencl/c_wrapper/wrap_cl_core.h @@ -108,6 +108,8 @@ error *image__get_image_info(clobj_t image, cl_image_info param, error *event__get_profiling_info(clobj_t event, cl_profiling_info param, generic_info *out); error *event__wait(clobj_t event); +// Nanny Event +unsigned long nanny_event__get_ward(clobj_t evt); // enqueue_* error *enqueue_nd_range_kernel(clobj_t *event, clobj_t queue, clobj_t kernel, cl_uint work_dim, @@ -127,7 +129,7 @@ error *enqueue_barrier(clobj_t queue); error *enqueue_read_buffer(clobj_t *event, clobj_t queue, clobj_t mem, void *buffer, size_t size, size_t device_offset, const clobj_t *wait_for, uint32_t num_wait_for, - int is_blocking); + int is_blocking, void (*ref)(unsigned long)); error *enqueue_copy_buffer(clobj_t *event, clobj_t queue, clobj_t src, clobj_t dst, ptrdiff_t byte_count, size_t src_offset, size_t dst_offset, @@ -135,13 +137,14 @@ error *enqueue_copy_buffer(clobj_t *event, clobj_t queue, clobj_t src, error *enqueue_write_buffer(clobj_t *event, clobj_t queue, clobj_t mem, const void *buffer, size_t size, size_t device_offset, const clobj_t *wait_for, - uint32_t num_wait_for, int is_blocking); + uint32_t num_wait_for, int is_blocking, + void (*ref)(unsigned long)); error *enqueue_read_image(clobj_t *event, clobj_t queue, clobj_t mem, size_t *origin, size_t *region, void *buffer, size_t row_pitch, size_t slice_pitch, const clobj_t *wait_for, uint32_t num_wait_for, - int is_blocking); + int is_blocking, void (*ref)(unsigned long)); // CL Object intptr_t clobj__int_ptr(clobj_t obj); error *clobj__get_info(clobj_t obj, cl_uint param, generic_info *out); diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py index 9c286254..c6a3e5fb 100644 --- a/pyopencl/cffi_cl.py +++ b/pyopencl/cffi_cl.py @@ -32,7 +32,7 @@ import sys # TODO: can we do without ctypes? import ctypes -from pyopencl._cffi import _ffi, _lib +from pyopencl._cffi import _ffi, _lib, _get_insert_func, _find_obj # {{{ compatibility shims @@ -490,7 +490,7 @@ def _c_buffer_from_obj(obj, writable=False): # numpy array return (_ffi.cast('void *', obj.__array_interface__['data'][0]), - obj.nbytes, None) + obj.nbytes, obj) elif isinstance(obj, np.generic): if writable: raise TypeError('expected an object with a writable ' @@ -511,7 +511,7 @@ def _c_buffer_from_obj(obj, writable=False): # bytes is not writable raise TypeError('expected an object with a writable ' 'buffer interface.') - return (obj, len(obj), None) + return (obj, len(obj), obj) else: raise LogicError("", status_code.INVALID_VALUE, "PyOpencl on PyPy only accepts numpy arrays " @@ -539,7 +539,7 @@ def _c_buffer_from_obj(obj, writable=False): raise LogicError(routine=None, code=status_code.INVALID_VALUE, msg="un-sized (pure-Python) types not acceptable as arguments") - return _ffi.cast('void *', addr.value), length.value, None + return _ffi.cast('void *', addr.value), length.value, obj # }}} @@ -696,6 +696,10 @@ class Event(_Common): def wait(self): _handle_error(_lib.event__wait(self.ptr)) +class NannyEvent(Event): + def get_ward(self): + return _find_obj(_lib.nanny_event__get_ward(self.ptr)) + # TODO # NannyEvent # wait_for_events @@ -804,8 +808,9 @@ def _enqueue_read_buffer(queue, mem, hostbuf, device_offset=0, c_wait_for, num_wait_for = _c_obj_list(wait_for) _handle_error(_lib.enqueue_read_buffer( ptr_event, queue.ptr, mem.ptr, c_buf, size, device_offset, - c_wait_for, num_wait_for, bool(is_blocking))) - return _create_instance(Event, ptr_event[0]) + c_wait_for, num_wait_for, bool(is_blocking), + _get_insert_func(hostbuf))) + return _create_instance(NannyEvent, ptr_event[0]) def _enqueue_copy_buffer(queue, src, dst, byte_count=-1, src_offset=0, @@ -820,13 +825,14 @@ def _enqueue_copy_buffer(queue, src, dst, byte_count=-1, src_offset=0, def _enqueue_write_buffer(queue, mem, hostbuf, device_offset=0, wait_for=None, is_blocking=True): - c_buf, size, _ = _c_buffer_from_obj(hostbuf) + c_buf, size, c_ref = _c_buffer_from_obj(hostbuf) ptr_event = _ffi.new('void **') c_wait_for, num_wait_for = _c_obj_list(wait_for) _handle_error(_lib.enqueue_write_buffer( ptr_event, queue.ptr, mem.ptr, c_buf, size, device_offset, - c_wait_for, num_wait_for, bool(is_blocking))) - return _create_instance(Event, ptr_event[0]) + c_wait_for, num_wait_for, bool(is_blocking), + _get_insert_func(c_ref))) + return _create_instance(NannyEvent, ptr_event[0]) # }}} @@ -841,8 +847,9 @@ def _enqueue_read_image(queue, mem, origin, region, hostbuf, row_pitch=0, # TODO check buffer size _handle_error(_lib.enqueue_read_image( ptr_event, queue.ptr, mem.ptr, origin, region, c_buf, row_pitch, - slice_pitch, c_wait_for, num_wait_for, bool(is_blocking))) - return _create_instance(Event, ptr_event[0]) + slice_pitch, c_wait_for, num_wait_for, bool(is_blocking), + _get_insert_func(c_buf))) + return _create_instance(NannyEvent, ptr_event[0]) # TODO: write_image copy_image fill_image # copy_buffer_to_image copy_image_to_buffer diff --git a/src/c_wrapper/wrap_cl.cpp b/src/c_wrapper/wrap_cl.cpp index 350d6f53..fff57435 100644 --- a/src/c_wrapper/wrap_cl.cpp +++ b/src/c_wrapper/wrap_cl.cpp @@ -752,6 +752,11 @@ public: finished(); } }; +static inline event* +new_event(cl_event evt) +{ + return pyopencl_convert_obj(event, clReleaseEvent, evt); +} class nanny_event : public event { private: @@ -785,15 +790,10 @@ public: python_deref(ward); } }; - static inline event* -new_event(cl_event evt, void (*reffunc)(unsigned long)=0) +new_nanny_event(cl_event evt, void (*reffunc)(unsigned long)) { - if (reffunc) { - return pyopencl_convert_obj(nanny_event, clReleaseEvent, evt, reffunc); - } else { - return pyopencl_convert_obj(event, clReleaseEvent, evt); - } + return pyopencl_convert_obj(nanny_event, clReleaseEvent, evt, reffunc); } // }}} @@ -2182,6 +2182,14 @@ event__wait(clobj_t evt) } +// Nanny Event +unsigned long +nanny_event__get_ward(clobj_t evt) +{ + return static_cast(evt)->get_ward(); +} + + // enqueue_* error* enqueue_nd_range_kernel(clobj_t *_evt, clobj_t _queue, clobj_t _knl, @@ -2261,7 +2269,7 @@ error* enqueue_read_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, void *buffer, size_t size, size_t device_offset, const clobj_t *_wait_for, uint32_t num_wait_for, - int is_blocking) + int is_blocking, void (*ref)(unsigned long)) { auto wait_for = buf_from_class(_wait_for, num_wait_for); auto queue = static_cast(_queue); @@ -2274,7 +2282,7 @@ enqueue_read_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, cast_bool(is_blocking), device_offset, size, buffer, num_wait_for, wait_for.get(), &evt); }); - *_evt = new_event(evt); + *_evt = new_nanny_event(evt, ref); }); } @@ -2282,7 +2290,7 @@ error* enqueue_write_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, const void *buffer, size_t size, size_t device_offset, const clobj_t *_wait_for, uint32_t num_wait_for, - int is_blocking) + int is_blocking, void (*ref)(unsigned long)) { auto wait_for = buf_from_class(_wait_for, num_wait_for); auto queue = static_cast(_queue); @@ -2295,8 +2303,7 @@ enqueue_write_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, cast_bool(is_blocking), device_offset, size, buffer, num_wait_for, wait_for.get(), &evt); }); - *_evt = new_event(evt); - // PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, buffer); + *_evt = new_nanny_event(evt, ref); }); } @@ -2337,7 +2344,8 @@ error* enqueue_read_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, size_t *origin, size_t *region, void *buffer, size_t row_pitch, size_t slice_pitch, const clobj_t *_wait_for, - uint32_t num_wait_for, int is_blocking) + uint32_t num_wait_for, int is_blocking, + void (*ref)(unsigned long)) { auto wait_for = buf_from_class(_wait_for, num_wait_for); auto queue = static_cast(_queue); @@ -2351,8 +2359,7 @@ enqueue_read_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, size_t *origin, slice_pitch, buffer, num_wait_for, wait_for.get(), &evt); }); - *_evt = new_event(evt); - //PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, buffer); + *_evt = new_nanny_event(evt, ref); }); } -- GitLab