diff --git a/pyopencl/c_wrapper/wrap_cl_core.h b/pyopencl/c_wrapper/wrap_cl_core.h index 97b8c2ef0d1d45187469fabf9cb01a2e95f91d05..04c6267aedb372befef912d8048d85e431433d95 100644 --- a/pyopencl/c_wrapper/wrap_cl_core.h +++ b/pyopencl/c_wrapper/wrap_cl_core.h @@ -69,6 +69,12 @@ error *create_buffer(clobj_t *buffer, clobj_t context, cl_mem_flags flags, size_t size, void *hostbuf); // Memory Object error *memory_object__release(clobj_t obj); +// Memory Map +error *memory_map__release(clobj_t _map, clobj_t _queue, + const clobj_t *_wait_for, uint32_t num_wait_for, + clobj_t *evt); +void *memory_map__data(clobj_t _map); +size_t memory_map__size(clobj_t _map); // Program error *create_program_with_source(clobj_t *program, clobj_t context, const char *src); @@ -142,6 +148,10 @@ error *enqueue_write_buffer(clobj_t *event, clobj_t queue, clobj_t mem, size_t device_offset, const clobj_t *wait_for, uint32_t num_wait_for, int is_blocking, void (*ref)(unsigned long)); +error *enqueue_map_buffer(clobj_t *_evt, clobj_t *mpa, clobj_t _queue, + clobj_t _mem, cl_map_flags flags, size_t offset, + size_t size, const clobj_t *_wait_for, + uint32_t num_wait_for, int block); error *enqueue_read_image(clobj_t *event, clobj_t queue, clobj_t mem, size_t *origin, size_t *region, void *buffer, diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py index 3e29e7afa706fd9a64ea6cdf10177648179778d2..b802c6b7eb488bf92d44cd5afad3a6c6ad2b8a98 100644 --- a/pyopencl/cffi_cl.py +++ b/pyopencl/cffi_cl.py @@ -33,6 +33,7 @@ import sys # TODO: can we do without ctypes? import ctypes from pyopencl._cffi import _ffi, _lib, _get_ref_func, _find_obj, _to_c_callback +from .compyte.array import f_contiguous_strides, c_contiguous_strides # {{{ compatibility shims @@ -459,6 +460,15 @@ class MemoryObject(MemoryObjectHolder): _handle_error(_lib.memory_object__release(self.ptr)) # TODO hostbuf? +class MemoryMap(_Common): + def release(self, queue=None, wait_for=None): + c_wait_for, num_wait_for = _clobj_list(wait_for) + _event = _ffi.new('clobj_t*') + _handle_error(_lib.memory_map__release( + self.ptr, queue.ptr if queue is not None else _ffi.NULL, + c_wait_for, num_wait_for, _event)) + return _create_instance(Event, _event[0]) + def _c_buffer_from_obj(obj, writable=False): """Convert a Python object to a tuple (cdata('void *'), num_bytes, dummy) to be able to pass a data stream to a C function. The dummy variable exists @@ -827,6 +837,44 @@ def _enqueue_write_buffer(queue, mem, hostbuf, device_offset=0, _get_ref_func(c_ref))) return _create_instance(NannyEvent, ptr_event[0]) +# PyPy bug report: https://bitbucket.org/pypy/pypy/issue/1777/unable-to-create-proper-numpy-array-from +def enqueue_map_buffer(queue, buf, flags, offset, shape, dtype, + order="C", strides=None, wait_for=None, + is_blocking=True): + dtype = np.dtype(dtype) + if not isinstance(shape, tuple): + try: + shape = tuple(shape) + except: + shape = (shape,) + byte_size = dtype.itemsize + if strides is None: + if order == "cC": + strides = c_contigous_strides(byte_size, shape) + elif order == "cF": + strides = f_contigous_strides(byte_size, shape) + else: + raise RuntimeError("unrecognized order specifier %s" % order, + 'enqueue_map_buffer') + for s in shape: + byte_size *= s + c_wait_for, num_wait_for = _clobj_list(wait_for) + _event = _ffi.new('clobj_t*') + _map = _ffi.new('clobj_t*') + _handle_error(_lib.enqueue_map_buffer(_event, _map, queue.ptr, buf.ptr, + flags, offset, byte_size, c_wait_for, + num_wait_for, bool(is_blocking))) + event = _create_instance(Event, _event[0]) + map = _create_instance(MemoryMap, _map[0]) + map.__array_interface__ = { + 'shape': shape, + 'typestr': dtype.str, + 'strides': strides, + 'data': (int(_lib.clobj__int_ptr(_map[0])), False), + 'version': 3 + } + return np.asarray(map), event + # }}} diff --git a/src/c_wrapper/error.h b/src/c_wrapper/error.h index 34b8571c8556f7b42b5c7a6d4cef50e5bc0bbb86..f1f3c3ff5e325cb06b7ce1c6a8f0b423d7371f92 100644 --- a/src/c_wrapper/error.h +++ b/src/c_wrapper/error.h @@ -4,6 +4,7 @@ #include <iostream> #include <utility> #include <functional> +#include <atomic> #ifndef __PYOPENCL_ERROR_H #define __PYOPENCL_ERROR_H diff --git a/src/c_wrapper/wrap_cl.cpp b/src/c_wrapper/wrap_cl.cpp index e015b22d95856c983f51d5fc4fd7354d19731fda..eca8f5991b8452b4fbc5124f0fd25dd7a3e024c8 100644 --- a/src/c_wrapper/wrap_cl.cpp +++ b/src/c_wrapper/wrap_cl.cpp @@ -630,6 +630,9 @@ public: cl_command_queue_properties props=0) : clobj(create_cl_command_queue(ctx, py_dev, props)) {} + command_queue(const command_queue &queue) + : command_queue(queue.data(), true) + {} ~command_queue() { pyopencl_call_guarded_cleanup(clReleaseCommandQueue, data()); @@ -901,7 +904,7 @@ public: class memory_object : public memory_object_holder { private: - mutable bool m_valid; + mutable volatile std::atomic_bool m_valid; void *m_hostbuf; public: memory_object(cl_mem mem, bool retain, void *hostbuf=0) @@ -914,20 +917,23 @@ public: m_hostbuf = hostbuf; } } - void - release() const + memory_object(const memory_object &mem) + : memory_object(mem.data(), true) + {} + ~memory_object() { - if (!m_valid) - throw clerror("MemoryObject.free", CL_INVALID_VALUE, - "trying to double-unref mem object"); + if (!m_valid.exchange(false)) + return; pyopencl_call_guarded_cleanup(clReleaseMemObject, data()); - m_valid = false; } - ~memory_object() + void + release() const { - if (m_valid) { - release(); + if (!m_valid.exchange(false)) { + throw clerror("MemoryObject.release", CL_INVALID_VALUE, + "trying to double-unref mem object"); } + pyopencl_call_guarded(clReleaseMemObject, data()); } void* hostbuf() const @@ -1536,6 +1542,67 @@ new_buffer(cl_mem mem, void *buff) // }}} +// {{{ memory_map + +class memory_map : public clbase { +private: + mutable volatile std::atomic_bool m_valid; + command_queue m_queue; + memory_object m_mem; + void *m_ptr; + size_t m_size; +public: + memory_map(const command_queue *queue, const memory_object *mem, + void *ptr, size_t size) + : m_valid(true), m_queue(*queue), m_mem(*mem), m_ptr(ptr), m_size(size) + {} + ~memory_map() + { + if (!m_valid.exchange(false)) + return; + pyopencl_call_guarded_cleanup(clEnqueueUnmapMemObject, m_queue.data(), + m_mem.data(), m_ptr, 0, NULL, NULL); + } + event* + release(const command_queue *queue, const clobj_t *_wait_for, + uint32_t num_wait_for) const + { + if (!m_valid.exchange(false)) { + throw clerror("MemoryMap.release", CL_INVALID_VALUE, + "trying to double-unref mem map"); + } + auto wait_for = buf_from_class<event>(_wait_for, num_wait_for); + queue = queue ? queue : &m_queue; + cl_event evt; + pyopencl_call_guarded(clEnqueueUnmapMemObject, queue->data(), + m_mem.data(), m_ptr, num_wait_for, + wait_for.get(), &evt); + return new_event(evt); + } + intptr_t + intptr() const + { + return (intptr_t)data(); + } + generic_info + get_info(cl_uint) const + { + throw clerror("MemoryMap.get_info", CL_INVALID_VALUE); + } + void* + data() const + { + return m_valid ? m_ptr : NULL; + } + size_t + size() const + { + return m_valid ? m_size : 0; + } +}; + +// }}} + // {{{ sampler class sampler : public clobj<cl_sampler> { @@ -2023,6 +2090,31 @@ memory_object__release(clobj_t obj) }); } + +// Memory Map +error* +memory_map__release(clobj_t _map, clobj_t _queue, const clobj_t *_wait_for, + uint32_t num_wait_for, clobj_t *evt) +{ + auto map = static_cast<memory_map*>(_map); + auto queue = static_cast<command_queue*>(_queue); + return c_handle_error([&] { + *evt = map->release(queue, _wait_for, num_wait_for); + }); +} + +void* +memory_map__data(clobj_t _map) +{ + return static_cast<memory_map*>(_map)->data(); +} + +size_t +memory_map__size(clobj_t _map) +{ + return static_cast<memory_map*>(_map)->size(); +} + // Program error* create_program_with_source(clobj_t *prog, clobj_t _ctx, const char *src) @@ -2404,6 +2496,38 @@ enqueue_copy_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _src, clobj_t _dst, }); } +error* +enqueue_map_buffer(clobj_t *_evt, clobj_t *map, clobj_t _queue, clobj_t _mem, + cl_map_flags flags, size_t offset, size_t size, + const clobj_t *_wait_for, uint32_t num_wait_for, + int block) +{ + auto wait_for = buf_from_class<event>(_wait_for, num_wait_for); + auto queue = static_cast<command_queue*>(_queue); + auto buf = static_cast<buffer*>(_mem); + return c_handle_error([&] { + cl_event evt; + void *res = retry_mem_error<void*>([&] { + return pyopencl_call_guarded( + clEnqueueMapBuffer, queue->data(), buf->data(), + cast_bool(block), flags, offset, size, num_wait_for, + wait_for.get(), &evt); + }); + try { + *_evt = new event(evt, false); + evt = 0; + *map = new memory_map(queue, buf, res, size); + } catch (...) { + if (evt) { + pyopencl_call_guarded_cleanup(clReleaseEvent, evt); + } + pyopencl_call_guarded_cleanup(clEnqueueUnmapMemObject, + queue->data(), buf->data(), + res, 0, NULL, NULL); + throw; + } + }); +} error* enqueue_read_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, size_t *origin,