diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py index f7ac7dccdade7dc84e3cc65bdc561274fc1e6738..a92683aca839b8684b9ad73a07a94d175163473f 100644 --- a/pyopencl/cffi_cl.py +++ b/pyopencl/cffi_cl.py @@ -493,9 +493,15 @@ def enqueue_nd_range_kernel(queue, kernel, global_work_size, local_work_size, gl )) return _create_instance(Event, ptr_event[0]) +def _c_wait_for(wait_for=None): + if wait_for is None: + return _ffi.NULL, 0 + return _ffi.new('void *[]', [ev.ptr for ev in wait_for]), len(wait_for) + def _enqueue_read_buffer(queue, mem, buf, device_offset=0, wait_for=None, is_blocking=True): c_buf, size = Buffer._c_buffer_from_obj(buf) ptr_event = _ffi.new('void **') + c_wait_for, num_wait_for = _c_wait_for(wait_for=wait_for) _handle_error(_lib._enqueue_read_buffer( ptr_event, queue.ptr, @@ -503,12 +509,14 @@ def _enqueue_read_buffer(queue, mem, buf, device_offset=0, wait_for=None, is_blo c_buf, size, device_offset, + c_wait_for, num_wait_for, bool(is_blocking) )) return _create_instance(Event, ptr_event[0]) def _enqueue_copy_buffer(queue, src, dst, byte_count=-1, src_offset=0, dst_offset=0, wait_for=None): ptr_event = _ffi.new('void **') + c_wait_for, num_wait_for = _c_wait_for(wait_for=wait_for) _handle_error(_lib._enqueue_copy_buffer( ptr_event, queue.ptr, @@ -516,20 +524,23 @@ def _enqueue_copy_buffer(queue, src, dst, byte_count=-1, src_offset=0, dst_offse dst.ptr, byte_count, src_offset, - dst_offset + dst_offset, + c_wait_for, num_wait_for, )) return _create_instance(Event, ptr_event[0]) def _enqueue_write_buffer(queue, mem, hostbuf, device_offset=0, wait_for=None, is_blocking=True): c_buf, size = Buffer._c_buffer_from_obj(hostbuf) ptr_event = _ffi.new('void **') - _handle_error(_lib._enqueue_read_buffer( + c_wait_for, num_wait_for = _c_wait_for(wait_for=wait_for) + _handle_error(_lib._enqueue_write_buffer( ptr_event, queue.ptr, mem.ptr, c_buf, size, device_offset, + c_wait_for, num_wait_for, bool(is_blocking) )) return _create_instance(Event, ptr_event[0]) diff --git a/src/c_wrapper/wrap_cl.cpp b/src/c_wrapper/wrap_cl.cpp index 27e9c2682392246f209d2d916fe81f52cc4bc124..fd58b944bb2520adc668472490c3f1c36278ffec 100644 --- a/src/c_wrapper/wrap_cl.cpp +++ b/src/c_wrapper/wrap_cl.cpp @@ -55,9 +55,6 @@ // }}} -#define PYOPENCL_WAITLIST_ARGS \ - num_events_in_wait_list, event_wait_list.empty( ) ? NULL : &event_wait_list.front() - #define PYOPENCL_GET_VEC_INFO(WHAT, FIRST_ARG, SECOND_ARG, RES_VEC) \ { \ size_t size; \ @@ -147,7 +144,20 @@ info.value = (void*)ar; \ return info; \ } - + +// {{{ event helpers -------------------------------------------------------------- +#define PYOPENCL_PARSE_WAIT_FOR \ + std::vector<cl_event> event_wait_list(num_wait_for); \ + { \ + for(unsigned i = 0; i < num_wait_for; ++i) { \ + event_wait_list[i] = static_cast<pyopencl::event*>(wait_for[i])->data(); \ + } \ + } + +#define PYOPENCL_WAITLIST_ARGS \ + num_wait_for, event_wait_list.empty( ) ? NULL : &event_wait_list.front() + + #define PYOPENCL_RETURN_NEW_EVENT(evt) \ try \ { \ @@ -159,6 +169,7 @@ throw; \ } +// }}} // {{{ equality testing #define PYOPENCL_EQUALITY_TESTS(cls) \ @@ -1804,11 +1815,10 @@ generic_info get_info(cl_device_info param_name) const void *buffer, size_t size, size_t device_offset, - /*py::object py_wait_for,*/ + void **wait_for, uint32_t num_wait_for, bool is_blocking) { - // TODO - //PYOPENCL_PARSE_WAIT_FOR; + PYOPENCL_PARSE_WAIT_FOR; cl_event evt; // TODO @@ -1818,8 +1828,7 @@ generic_info get_info(cl_device_info param_name) const mem.data(), PYOPENCL_CAST_BOOL(is_blocking), device_offset, size, buffer, - 0, NULL, - //PYOPENCL_WAITLIST_ARGS, + PYOPENCL_WAITLIST_ARGS, &evt )); //); @@ -1834,14 +1843,11 @@ generic_info get_info(cl_device_info param_name) const memory_object_holder &dst, ptrdiff_t byte_count, size_t src_offset, - size_t dst_offset - // , - /*py::object py_wait_for*/ + size_t dst_offset, + void **wait_for, uint32_t num_wait_for ) { - // TODO - // PYOPENCL_PARSE_WAIT_FOR; - + PYOPENCL_PARSE_WAIT_FOR; if (byte_count < 0) { size_t byte_count_src = 0; @@ -1860,7 +1866,7 @@ generic_info get_info(cl_device_info param_name) const src.data(), dst.data(), src_offset, dst_offset, byte_count, - 0, NULL, //PYOPENCL_WAITLIST_ARGS, + PYOPENCL_WAITLIST_ARGS, &evt )) // ); @@ -1874,10 +1880,10 @@ generic_info get_info(cl_device_info param_name) const void *buffer, size_t size, size_t device_offset, - /*py::object py_wait_for,*/ + void **wait_for, uint32_t num_wait_for, bool is_blocking) { - //PYOPENCL_PARSE_WAIT_FOR; + PYOPENCL_PARSE_WAIT_FOR; cl_event evt; // TODO @@ -1887,7 +1893,7 @@ generic_info get_info(cl_device_info param_name) const mem.data(), PYOPENCL_CAST_BOOL(is_blocking), device_offset, size, buffer, - 0, NULL, //PYOPENCL_WAITLIST_ARGS, + PYOPENCL_WAITLIST_ARGS, &evt )); //); @@ -2156,30 +2162,30 @@ generic_info get_info(cl_device_info param_name) const } - ::error *_enqueue_read_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_memory_object_holder, void *buffer, size_t size, size_t device_offset, int is_blocking) { - C_HANDLE_ERROR( - *ptr_event = enqueue_read_buffer(*static_cast<pyopencl::command_queue*>(ptr_command_queue), - *static_cast<pyopencl::memory_object_holder*>(ptr_memory_object_holder), - buffer, size, device_offset, (bool)is_blocking); - ) - return 0; +::error *_enqueue_read_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_memory_object_holder, void *buffer, size_t size, size_t device_offset, void **wait_for, uint32_t num_wait_for, int is_blocking) { + C_HANDLE_ERROR( + *ptr_event = enqueue_read_buffer(*static_cast<pyopencl::command_queue*>(ptr_command_queue), + *static_cast<pyopencl::memory_object_holder*>(ptr_memory_object_holder), + buffer, size, device_offset, wait_for, num_wait_for, (bool)is_blocking); + ) + return 0; } - ::error *_enqueue_write_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_memory_object_holder, void *buffer, size_t size, size_t device_offset, int is_blocking) { + ::error *_enqueue_write_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_memory_object_holder, void *buffer, size_t size, size_t device_offset, void **wait_for, uint32_t num_wait_for, int is_blocking) { C_HANDLE_ERROR( *ptr_event = enqueue_write_buffer(*static_cast<pyopencl::command_queue*>(ptr_command_queue), *static_cast<pyopencl::memory_object_holder*>(ptr_memory_object_holder), - buffer, size, device_offset, (bool)is_blocking); + buffer, size, device_offset, wait_for, num_wait_for, (bool)is_blocking); ) return 0; } -::error *_enqueue_copy_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_src, void *ptr_dst, ptrdiff_t byte_count, size_t src_offset, size_t dst_offset) { +::error *_enqueue_copy_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_src, void *ptr_dst, ptrdiff_t byte_count, size_t src_offset, size_t dst_offset, void **wait_for, uint32_t num_wait_for) { C_HANDLE_ERROR( *ptr_event = enqueue_copy_buffer(*static_cast<pyopencl::command_queue*>(ptr_command_queue), *static_cast<pyopencl::memory_object_holder*>(ptr_src), *static_cast<pyopencl::memory_object_holder*>(ptr_dst), - byte_count, src_offset, dst_offset); + byte_count, src_offset, dst_offset, wait_for, num_wait_for); ) return 0; } diff --git a/src/c_wrapper/wrap_cl_core.h b/src/c_wrapper/wrap_cl_core.h index c2f777026366bbf4eebc74d85f43b169bc968edf..76fcb5825b04583050907d006e799bd4bf172a21 100644 --- a/src/c_wrapper/wrap_cl_core.h +++ b/src/c_wrapper/wrap_cl_core.h @@ -44,9 +44,9 @@ error *kernel__set_arg_mem_buffer(void *ptr_kernel, cl_uint arg_index, void *ptr long _hash(void *ptr_platform, class_t); error *_enqueue_nd_range_kernel(void **ptr_event, void *ptr_command_queue, void *ptr_kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size); -error *_enqueue_read_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_memory_object_holder, void *buffer, size_t size, size_t device_offset, int is_blocking); -error *_enqueue_copy_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_src, void *ptr_dst, ptrdiff_t byte_count, size_t src_offset, size_t dst_offset); -error *_enqueue_write_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_memory_object_holder, void *buffer, size_t size, size_t device_offset, int is_blocking); +error *_enqueue_read_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_memory_object_holder, void *buffer, size_t size, size_t device_offset, void **wait_for, uint32_t num_wait_for, int is_blocking); +error *_enqueue_copy_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_src, void *ptr_dst, ptrdiff_t byte_count, size_t src_offset, size_t dst_offset, void **wait_for, uint32_t num_wait_for); +error *_enqueue_write_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_memory_object_holder, void *buffer, size_t size, size_t device_offset, void **wait_for, uint32_t num_wait_for, int is_blocking); void populate_constants(void(*add)(const char*, const char*, long value)); intptr_t _int_ptr(void*, class_t);