From 49ab696f914b6ffa205c077a6484882514733237 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 6 Aug 2018 12:11:52 -0500 Subject: [PATCH 01/92] Delete cffi bits --- .gitmodules | 6 +- cffi_build.py.in | 86 ---- cl_gl_types.h | 12 - cl_types.h | 128 ----- src/c_wrapper/bitlog.cpp | 59 --- src/c_wrapper/buffer.cpp | 235 --------- src/c_wrapper/buffer.h | 27 - src/c_wrapper/clhelper.h | 254 ---------- src/c_wrapper/clinfo_ext.h | 129 ----- src/c_wrapper/clobj.h | 149 ------ src/c_wrapper/command_queue.cpp | 132 ----- src/c_wrapper/command_queue.h | 64 --- src/c_wrapper/context.cpp | 153 ------ src/c_wrapper/context.h | 34 -- src/c_wrapper/debug.cpp | 84 ---- src/c_wrapper/debug.h | 33 -- src/c_wrapper/device.cpp | 375 -------------- src/c_wrapper/device.h | 61 --- src/c_wrapper/error.h | 337 ------------- src/c_wrapper/event.cpp | 294 ----------- src/c_wrapper/event.h | 87 ---- src/c_wrapper/function.h | 121 ----- src/c_wrapper/gl_obj.cpp | 155 ------ src/c_wrapper/gl_obj.h | 46 -- src/c_wrapper/image.cpp | 237 --------- src/c_wrapper/image.h | 50 -- src/c_wrapper/kernel.cpp | 213 -------- src/c_wrapper/kernel.h | 44 -- src/c_wrapper/memory_map.cpp | 115 ----- src/c_wrapper/memory_map.h | 37 -- src/c_wrapper/memory_object.cpp | 116 ----- src/c_wrapper/memory_object.h | 56 --- src/c_wrapper/mingw-std-threads | 1 - src/c_wrapper/platform.cpp | 109 ---- src/c_wrapper/platform.h | 27 - src/c_wrapper/program.cpp | 269 ---------- src/c_wrapper/program.h | 58 --- src/c_wrapper/pyhelper.cpp | 18 - src/c_wrapper/pyhelper.h | 43 -- src/c_wrapper/pyopencl_ext.h | 58 --- src/c_wrapper/sampler.cpp | 54 -- src/c_wrapper/sampler.h | 33 -- src/c_wrapper/svm.cpp | 173 ------- src/c_wrapper/svm.h | 4 - src/c_wrapper/utils.h | 551 -------------------- src/c_wrapper/wrap_cl.cpp | 123 ----- src/c_wrapper/wrap_cl.h | 171 ------- src/c_wrapper/wrap_cl_core.h | 399 --------------- src/c_wrapper/wrap_cl_gl_core.h | 18 - src/c_wrapper/wrap_constants.cpp | 827 ------------------------------- 50 files changed, 3 insertions(+), 6862 deletions(-) delete mode 100644 cffi_build.py.in delete mode 100644 cl_gl_types.h delete mode 100644 cl_types.h delete mode 100644 src/c_wrapper/bitlog.cpp delete mode 100644 src/c_wrapper/buffer.cpp delete mode 100644 src/c_wrapper/buffer.h delete mode 100644 src/c_wrapper/clhelper.h delete mode 100644 src/c_wrapper/clinfo_ext.h delete mode 100644 src/c_wrapper/clobj.h delete mode 100644 src/c_wrapper/command_queue.cpp delete mode 100644 src/c_wrapper/command_queue.h delete mode 100644 src/c_wrapper/context.cpp delete mode 100644 src/c_wrapper/context.h delete mode 100644 src/c_wrapper/debug.cpp delete mode 100644 src/c_wrapper/debug.h delete mode 100644 src/c_wrapper/device.cpp delete mode 100644 src/c_wrapper/device.h delete mode 100644 src/c_wrapper/error.h delete mode 100644 src/c_wrapper/event.cpp delete mode 100644 src/c_wrapper/event.h delete mode 100644 src/c_wrapper/function.h delete mode 100644 src/c_wrapper/gl_obj.cpp delete mode 100644 src/c_wrapper/gl_obj.h delete mode 100644 src/c_wrapper/image.cpp delete mode 100644 src/c_wrapper/image.h delete mode 100644 src/c_wrapper/kernel.cpp delete mode 100644 src/c_wrapper/kernel.h delete mode 100644 src/c_wrapper/memory_map.cpp delete mode 100644 src/c_wrapper/memory_map.h delete mode 100644 src/c_wrapper/memory_object.cpp delete mode 100644 src/c_wrapper/memory_object.h delete mode 160000 src/c_wrapper/mingw-std-threads delete mode 100644 src/c_wrapper/platform.cpp delete mode 100644 src/c_wrapper/platform.h delete mode 100644 src/c_wrapper/program.cpp delete mode 100644 src/c_wrapper/program.h delete mode 100644 src/c_wrapper/pyhelper.cpp delete mode 100644 src/c_wrapper/pyhelper.h delete mode 100644 src/c_wrapper/pyopencl_ext.h delete mode 100644 src/c_wrapper/sampler.cpp delete mode 100644 src/c_wrapper/sampler.h delete mode 100644 src/c_wrapper/svm.cpp delete mode 100644 src/c_wrapper/svm.h delete mode 100644 src/c_wrapper/utils.h delete mode 100644 src/c_wrapper/wrap_cl.cpp delete mode 100644 src/c_wrapper/wrap_cl.h delete mode 100644 src/c_wrapper/wrap_cl_core.h delete mode 100644 src/c_wrapper/wrap_cl_gl_core.h delete mode 100644 src/c_wrapper/wrap_constants.cpp diff --git a/.gitmodules b/.gitmodules index cb5a4e23..b675a6cc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "pyopencl/compyte"] path = pyopencl/compyte url = https://github.com/inducer/compyte -[submodule "src/c_wrapper/mingw-std-threads"] - path = src/c_wrapper/mingw-std-threads - url = https://github.com/meganz/mingw-std-threads.git +[submodule "pybind11"] + path = pybind11 + url = https://github.com/pybind/pybind11.git diff --git a/cffi_build.py.in b/cffi_build.py.in deleted file mode 100644 index f948c824..00000000 --- a/cffi_build.py.in +++ /dev/null @@ -1,86 +0,0 @@ -from __future__ import absolute_import, print_function - -__copyright__ = """ -Copyright (C) 2009-15 Andreas Kloeckner -Copyright (C) 2013 Marko Bencun -Copyright (C) 2014 Yuyi Chao -""" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - - -from cffi import FFI - -ffi = FFI() - - -with open("cl_types.h", "rt") as f: - ffi.cdef(f.read()) - -if {CL_ENABLE_GL}: - with open("cl_gl_types.h") as f: - ffi.cdef(f.read()) - -with open("src/c_wrapper/wrap_cl_core.h", "rt") as f: - ffi.cdef(f.read()) - -if {CL_ENABLE_GL}: - with open("src/c_wrapper/wrap_cl_gl_core.h") as f: - ffi.cdef(f.read()) - -ffi.set_source("pyopencl._cffi", - """ - #include "wrap_cl.h" - """, - define_macros=list({EXTRA_DEFINES}.items()), - include_dirs=( - {CL_INC_DIR} + ["src/c_wrapper/"]), - library_dirs={CL_LIB_DIR}, - libraries={CL_LIBNAME}, - extra_compile_args=({CXXFLAGS}), - extra_link_args={LDFLAGS}, - source_extension=".cpp", - sources=[ - "src/c_wrapper/wrap_cl.cpp", - "src/c_wrapper/wrap_constants.cpp", - "src/c_wrapper/bitlog.cpp", - "src/c_wrapper/pyhelper.cpp", - "src/c_wrapper/platform.cpp", - "src/c_wrapper/device.cpp", - "src/c_wrapper/context.cpp", - "src/c_wrapper/command_queue.cpp", - "src/c_wrapper/event.cpp", - "src/c_wrapper/memory_object.cpp", - "src/c_wrapper/svm.cpp", - "src/c_wrapper/image.cpp", - "src/c_wrapper/gl_obj.cpp", - "src/c_wrapper/memory_map.cpp", - "src/c_wrapper/buffer.cpp", - "src/c_wrapper/sampler.cpp", - "src/c_wrapper/program.cpp", - "src/c_wrapper/kernel.cpp", - "src/c_wrapper/debug.cpp", - ] - ) - - -if __name__ == "__main__": - ffi.compile() diff --git a/cl_gl_types.h b/cl_gl_types.h deleted file mode 100644 index ea0e7e4d..00000000 --- a/cl_gl_types.h +++ /dev/null @@ -1,12 +0,0 @@ -/* cl_gl.h */ -typedef cl_uint cl_gl_object_type; -typedef cl_uint cl_gl_texture_info; -typedef cl_uint cl_gl_platform_info; -typedef struct __GLsync *cl_GLsync; -typedef cl_uint cl_gl_context_info; - -/* cl_egl.h */ -typedef void* CLeglImageKHR; -typedef void* CLeglDisplayKHR; -typedef void* CLeglSyncKHR; -typedef intptr_t cl_egl_image_properties_khr; diff --git a/cl_types.h b/cl_types.h deleted file mode 100644 index 5df16013..00000000 --- a/cl_types.h +++ /dev/null @@ -1,128 +0,0 @@ -/* gl.h */ -typedef unsigned int GLenum; -typedef int GLint; /* 4-byte signed */ -typedef unsigned int GLuint; /* 4-byte unsigned */ - - -/* cl.h */ -/* scalar types */ -typedef int8_t cl_char; -typedef uint8_t cl_uchar; -typedef int16_t cl_short; -typedef uint16_t cl_ushort; -typedef int32_t cl_int; -typedef uint32_t cl_uint; -typedef int64_t cl_long; -typedef uint64_t cl_ulong; - -typedef uint16_t cl_half; -typedef float cl_float; -typedef double cl_double; - - -typedef struct _cl_platform_id * cl_platform_id; -typedef struct _cl_device_id * cl_device_id; -typedef struct _cl_context * cl_context; -typedef struct _cl_command_queue * cl_command_queue; -typedef struct _cl_mem * cl_mem; -typedef struct _cl_program * cl_program; -typedef struct _cl_kernel * cl_kernel; -typedef struct _cl_event * cl_event; -typedef struct _cl_sampler * cl_sampler; - -/* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be -the same size as the bool in kernels. */ -typedef cl_uint cl_bool; -typedef cl_ulong cl_bitfield; -typedef cl_bitfield cl_device_type; -typedef cl_uint cl_platform_info; -typedef cl_uint cl_device_info; -typedef cl_bitfield cl_device_fp_config; -typedef cl_uint cl_device_mem_cache_type; -typedef cl_uint cl_device_local_mem_type; -typedef cl_bitfield cl_device_exec_capabilities; -typedef cl_bitfield cl_device_svm_capabilities; // 2.0 -typedef cl_bitfield cl_command_queue_properties; -typedef intptr_t cl_device_partition_property; -typedef cl_bitfield cl_device_affinity_domain; - -typedef intptr_t cl_context_properties; -typedef cl_uint cl_context_info; -typedef cl_uint cl_command_queue_info; -typedef cl_uint cl_channel_order; -typedef cl_uint cl_channel_type; -typedef cl_bitfield cl_mem_flags; -typedef cl_bitfield cl_svm_mem_flags; // 2.0 -typedef cl_uint cl_mem_object_type; -typedef cl_uint cl_mem_info; -typedef cl_bitfield cl_mem_migration_flags; -typedef cl_uint cl_image_info; -typedef cl_uint cl_buffer_create_type; -typedef cl_uint cl_addressing_mode; -typedef cl_uint cl_filter_mode; -typedef cl_uint cl_sampler_info; -typedef cl_bitfield cl_map_flags; -typedef intptr_t cl_pipe_properties; // 2.0 -typedef cl_uint cl_pipe_info; // 2.0 -typedef cl_uint cl_program_info; -typedef cl_uint cl_program_build_info; -typedef cl_uint cl_program_binary_type; -typedef cl_int cl_build_status; -typedef cl_uint cl_kernel_info; -typedef cl_uint cl_kernel_arg_info; -typedef cl_uint cl_kernel_arg_address_qualifier; -typedef cl_uint cl_kernel_arg_access_qualifier; -typedef cl_bitfield cl_kernel_arg_type_qualifier; -typedef cl_uint cl_kernel_work_group_info; -typedef cl_uint cl_event_info; -typedef cl_uint cl_command_type; -typedef cl_uint cl_profiling_info; -typedef cl_bitfield cl_sampler_properties; // 2.0 -typedef cl_uint cl_kernel_exec_info; // 2.0 - -typedef struct _cl_image_format { - cl_channel_order image_channel_order; - cl_channel_type image_channel_data_type; -} cl_image_format; - -typedef struct _cl_image_desc { - cl_mem_object_type image_type; - size_t image_width; - size_t image_height; - size_t image_depth; - size_t image_array_size; - size_t image_row_pitch; - size_t image_slice_pitch; - cl_uint num_mip_levels; - cl_uint num_samples; - cl_mem buffer; -} cl_image_desc; - -typedef struct _cl_buffer_region { - size_t origin; - size_t size; -} cl_buffer_region; - -/* cl_ext.h */ - -typedef union -{ - struct { cl_uint type; cl_uint data[5]; } raw; - struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; -} cl_device_topology_amd; - -/* -typedef cl_ulong cl_device_partition_property_ext; -typedef cl_uint cl_image_pitch_info_qcom; -typedef struct _cl_mem_ext_host_ptr { - cl_uint allocation_type; - cl_uint host_cache_policy; -} cl_mem_ext_host_ptr; -typedef struct _cl_mem_ion_host_ptr { - cl_mem_ext_host_ptr ext_host_ptr; - int ion_filedesc; - void* ion_hostptr; -} cl_mem_ion_host_ptr; - -typedef cl_bitfield cl_mem_migration_flags_ext; -*/ diff --git a/src/c_wrapper/bitlog.cpp b/src/c_wrapper/bitlog.cpp deleted file mode 100644 index 418eb4d8..00000000 --- a/src/c_wrapper/bitlog.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "wrap_cl.h" -#include "function.h" - -#include -#include - -/* from http://graphics.stanford.edu/~seander/bithacks.html */ -static const char log_table_8[] = { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 -}; - -static PYOPENCL_INLINE unsigned -bitlog2_16(uint16_t v) -{ - if (unsigned long t = v >> 8) { - return 8 + log_table_8[t]; - } else { - return log_table_8[v]; - } -} - -static PYOPENCL_INLINE unsigned -bitlog2_32(uint32_t v) -{ - if (uint16_t t = v >> 16) { - return 16 + bitlog2_16(t); - } else { - return bitlog2_16(v); - } -} - -unsigned -bitlog2(unsigned long v) -{ -#if (ULONG_MAX != 4294967295) - if (uint32_t t = v >> 32) { - return 32 + bitlog2_32(t); - } else { -#endif - return bitlog2_32(v); -#if (ULONG_MAX != 4294967295) - } -#endif -} diff --git a/src/c_wrapper/buffer.cpp b/src/c_wrapper/buffer.cpp deleted file mode 100644 index 70e1ff3e..00000000 --- a/src/c_wrapper/buffer.cpp +++ /dev/null @@ -1,235 +0,0 @@ -#include -#include "buffer.h" -#include "context.h" -#include "command_queue.h" -#include "event.h" - -template void print_clobj(std::ostream&, const buffer*); - -PYOPENCL_USE_RESULT static PYOPENCL_INLINE buffer* -new_buffer(cl_mem mem) -{ - return pyopencl_convert_obj(buffer, clReleaseMemObject, mem); -} - -#if PYOPENCL_CL_VERSION >= 0x1010 -PYOPENCL_USE_RESULT buffer* -buffer::get_sub_region(size_t orig, size_t size, cl_mem_flags flags) const -{ - cl_buffer_region reg = {orig, size}; - - auto mem = retry_mem_error([&] { - return pyopencl_call_guarded(clCreateSubBuffer, PYOPENCL_CL_CASTABLE_THIS, flags, - CL_BUFFER_CREATE_TYPE_REGION, ®); - }); - return new_buffer(mem); -} - -#endif - -// c wrapper - -// Buffer -error* -create_buffer(clobj_t *buffer, clobj_t _ctx, cl_mem_flags flags, - size_t size, void *hostbuf) -{ - auto ctx = static_cast(_ctx); - return c_handle_retry_mem_error([&] { - auto mem = pyopencl_call_guarded(clCreateBuffer, ctx, - flags, size, hostbuf); - *buffer = new_buffer(mem); - }); -} - -error* -enqueue_read_buffer(clobj_t *evt, clobj_t _queue, clobj_t _mem, - void *buffer, size_t size, size_t device_offset, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block, void *pyobj) -{ - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto mem = static_cast(_mem); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueReadBuffer, queue, mem, bool(block), device_offset, - size, buffer, wait_for, nanny_event_out(evt, pyobj)); - }); -} - -error* -enqueue_write_buffer(clobj_t *evt, clobj_t _queue, clobj_t _mem, - const void *buffer, size_t size, size_t device_offset, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block, void *pyobj) -{ - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto mem = static_cast(_mem); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueWriteBuffer, queue, mem, bool(block), device_offset, - size, buffer, wait_for, nanny_event_out(evt, pyobj)); - }); -} - -error* -enqueue_copy_buffer(clobj_t *evt, clobj_t _queue, clobj_t _src, clobj_t _dst, - ptrdiff_t byte_count, size_t src_offset, size_t dst_offset, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ - auto queue = static_cast(_queue); - auto src = static_cast(_src); - auto dst = static_cast(_dst); - return c_handle_error([&] { - if (byte_count < 0) { - size_t byte_count_src = 0; - size_t byte_count_dst = 0; - pyopencl_call_guarded( - clGetMemObjectInfo, src, CL_MEM_SIZE, - sizeof(byte_count), &byte_count_src, nullptr); - pyopencl_call_guarded( - clGetMemObjectInfo, src, CL_MEM_SIZE, - sizeof(byte_count), &byte_count_dst, nullptr); - byte_count = std::min(byte_count_src, byte_count_dst); - } - const auto wait_for = buf_from_class(_wait_for, - num_wait_for); - retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueCopyBuffer, queue, src, dst, src_offset, - dst_offset, byte_count, wait_for, event_out(evt)); - }); - }); -} - - -error* -enqueue_fill_buffer(clobj_t *evt, clobj_t _queue, clobj_t _mem, void *pattern, - size_t psize, size_t offset, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto mem = static_cast(_mem); - // TODO debug print pattern - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueFillBuffer, queue, mem, pattern, - psize, offset, size, wait_for, - event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED(clEnqueueFillBuffer, "CL 1.1 and below") -#endif -} - - -// {{{ rectangular transfers - -error* -enqueue_read_buffer_rect(clobj_t *evt, clobj_t _queue, clobj_t _mem, void *buf, - const size_t *_buf_orig, size_t buf_orig_l, - const size_t *_host_orig, size_t host_orig_l, - const size_t *_reg, size_t reg_l, - const size_t *_buf_pitches, size_t buf_pitches_l, - const size_t *_host_pitches, size_t host_pitches_l, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block, void *pyobj) -{ -#if PYOPENCL_CL_VERSION >= 0x1010 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto mem = static_cast(_mem); - ConstBuffer buf_orig(_buf_orig, buf_orig_l); - ConstBuffer host_orig(_host_orig, host_orig_l); - ConstBuffer reg(_reg, reg_l, 1); - ConstBuffer buf_pitches(_buf_pitches, buf_pitches_l); - ConstBuffer host_pitches(_host_pitches, host_pitches_l); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueReadBufferRect, queue, mem, bool(block), buf_orig, - host_orig, reg, buf_pitches[0], buf_pitches[1], host_pitches[0], - host_pitches[1], buf, wait_for, nanny_event_out(evt, pyobj)); - }); -#else - PYOPENCL_UNSUPPORTED(clEnqueueReadBufferRect, "CL 1.0") -#endif -} - -error* -enqueue_write_buffer_rect(clobj_t *evt, clobj_t _queue, clobj_t _mem, void *buf, - const size_t *_buf_orig, size_t buf_orig_l, - const size_t *_host_orig, size_t host_orig_l, - const size_t *_reg, size_t reg_l, - const size_t *_buf_pitches, size_t buf_pitches_l, - const size_t *_host_pitches, size_t host_pitches_l, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block, void *pyobj) -{ -#if PYOPENCL_CL_VERSION >= 0x1010 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto mem = static_cast(_mem); - ConstBuffer buf_orig(_buf_orig, buf_orig_l); - ConstBuffer host_orig(_host_orig, host_orig_l); - ConstBuffer reg(_reg, reg_l, 1); - ConstBuffer buf_pitches(_buf_pitches, buf_pitches_l); - ConstBuffer host_pitches(_host_pitches, host_pitches_l); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueWriteBufferRect, queue, mem, bool(block), buf_orig, - host_orig, reg, buf_pitches[0], buf_pitches[1], host_pitches[0], - host_pitches[1], buf, wait_for, nanny_event_out(evt, pyobj)); - }); -#else - PYOPENCL_UNSUPPORTED(clEnqueueWriteBufferRect, "CL 1.0") -#endif -} - -error* -enqueue_copy_buffer_rect(clobj_t *evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, const size_t *_src_orig, - size_t src_orig_l, const size_t *_dst_orig, - size_t dst_orig_l, const size_t *_reg, size_t reg_l, - const size_t *_src_pitches, size_t src_pitches_l, - const size_t *_dst_pitches, size_t dst_pitches_l, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x1010 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto src = static_cast(_src); - auto dst = static_cast(_dst); - ConstBuffer src_orig(_src_orig, src_orig_l); - ConstBuffer dst_orig(_dst_orig, dst_orig_l); - ConstBuffer reg(_reg, reg_l, 1); - ConstBuffer src_pitches(_src_pitches, src_pitches_l); - ConstBuffer dst_pitches(_dst_pitches, dst_pitches_l); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueCopyBufferRect, queue, src, dst, src_orig, dst_orig, - reg, src_pitches[0], src_pitches[1], dst_pitches[0], - dst_pitches[1], wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED(clEnqueueCopyBufferRect, "CL 1.0") -#endif -} - -// }}} - -error* -buffer__get_sub_region(clobj_t *_sub_buf, clobj_t _buf, size_t orig, - size_t size, cl_mem_flags flags) -{ -#if PYOPENCL_CL_VERSION >= 0x1010 - auto buf = static_cast(_buf); - return c_handle_error([&] { - *_sub_buf = buf->get_sub_region(orig, size, flags); - }); -#else - PYOPENCL_UNSUPPORTED(clCreateSubBuffer, "CL 1.0") -#endif -} diff --git a/src/c_wrapper/buffer.h b/src/c_wrapper/buffer.h deleted file mode 100644 index c97a7919..00000000 --- a/src/c_wrapper/buffer.h +++ /dev/null @@ -1,27 +0,0 @@ -#include "memory_object.h" -#include "clhelper.h" - -#ifndef __PYOPENCL_BUFFER_H -#define __PYOPENCL_BUFFER_H - -// {{{ buffer - -class buffer : public memory_object { -public: - PYOPENCL_DEF_CL_CLASS(BUFFER); - PYOPENCL_INLINE - buffer(cl_mem mem, bool retain) - : memory_object(mem, retain) - {} - -#if PYOPENCL_CL_VERSION >= 0x1010 - PYOPENCL_USE_RESULT buffer *get_sub_region(size_t orig, size_t size, - cl_mem_flags flags) const; -#endif -}; - -extern template void print_clobj(std::ostream&, const buffer*); - -// }}} - -#endif diff --git a/src/c_wrapper/clhelper.h b/src/c_wrapper/clhelper.h deleted file mode 100644 index d0aff85c..00000000 --- a/src/c_wrapper/clhelper.h +++ /dev/null @@ -1,254 +0,0 @@ -#include "error.h" -#include "clobj.h" - -#ifndef __PYOPENCL_CLHELPER_H -#define __PYOPENCL_CLHELPER_H - -template -class _CLObjOutArg : public OutArg { - typedef typename CLObj::cl_type CLType; - clobj_t *const m_ret; - CLType m_clobj; - cl_int (CL_API_CALL *m_release)(CLType); - const char *m_name; - std::tuple m_t1; - template - PYOPENCL_INLINE CLObj* - __new_obj(seq) - { - return new CLObj(m_clobj, false, std::get(m_t1)...); - } -public: - PYOPENCL_INLINE - _CLObjOutArg(clobj_t *ret, cl_int (CL_API_CALL *release)(CLType), - const char *name, T... t1) noexcept - : m_ret(ret), m_clobj(nullptr), m_release(release), - m_name(name), m_t1(t1...) - { - } - PYOPENCL_INLINE - _CLObjOutArg(_CLObjOutArg &&other) noexcept - : m_ret(other.m_ret), m_clobj(other.m_clobj), - m_release(other.m_release), m_name(other.m_name) - { - std::swap(m_t1, other.m_t1); - } - PYOPENCL_INLINE typename CLObj::cl_type* - get() - { - return &m_clobj; - } - PYOPENCL_INLINE void - convert() - { - *m_ret = __new_obj(typename gens::type()); - } - PYOPENCL_INLINE void - cleanup(bool converted) - { - if (converted) { - delete *m_ret; - *m_ret = nullptr; - } else { - call_guarded_cleanup(m_release, m_name, m_clobj); - } - } - PYOPENCL_INLINE void - print(std::ostream &stm, bool out=false) const - { - print_arg(stm, m_clobj, out); - } -}; - -template -static PYOPENCL_INLINE _CLObjOutArg -make_cloutarg(clobj_t *ret, cl_int (CL_API_CALL *release)(typename CLObj::cl_type), - const char *name, T... t1) -{ - return _CLObjOutArg(ret, release, name, t1...); -} -#define pyopencl_outarg(type, ret, func, ...) \ - make_cloutarg(ret, func, #func, ##__VA_ARGS__) - -// {{{ GetInfo helpers - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf -get_vec_info(cl_int (CL_API_CALL *func)(ArgTypes...), const char *name, - ArgTypes2&&... args) -{ - size_t size = 0; - call_guarded(func, name, args..., 0, nullptr, buf_arg(size)); - pyopencl_buf buf(size / sizeof(T)); - call_guarded(func, name, args..., size_arg(buf), buf_arg(size)); - return buf; -} -#define pyopencl_get_vec_info(type, what, ...) \ - get_vec_info(clGet##what##Info, "clGet" #what "Info", __VA_ARGS__) - -inline generic_info make_generic_info(class_t opaque_class, const char *type, bool free_type, void *value, bool free_value) -{ - generic_info result; - result.opaque_class = opaque_class; - result.type = type; - result.free_type = free_type; - result.value = value; - result.free_value = free_value; - return result; -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -convert_array_info(const char *tname, pyopencl_buf &buf) -{ - return make_generic_info( - CLASS_NONE, - _copy_str(std::string(tname) + "[" + tostring(buf.len()) + "]"), - true, - buf.release(), - true); -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -convert_array_info(const char *tname, pyopencl_buf &&_buf) -{ - pyopencl_buf &buf = _buf; - return convert_array_info(tname, buf); -} - -#define pyopencl_convert_array_info(type, buf) \ - convert_array_info(#type, buf) -#define pyopencl_get_array_info(type, what, ...) \ - pyopencl_convert_array_info(type, pyopencl_get_vec_info(type, what, __VA_ARGS__)) - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -convert_opaque_array_info(T &&buf) -{ - return make_generic_info( - CLObj::class_id, - _copy_str(std::string("void*[") + tostring(buf.len()) + "]"), - true, - buf_to_base(std::forward(buf)).release(), - true); -} -#define pyopencl_get_opaque_array_info(cls, what, ...) \ - convert_opaque_array_info( \ - pyopencl_get_vec_info(cls::cl_type, what, __VA_ARGS__)) - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -get_opaque_info(cl_int (CL_API_CALL *func)(ArgTypes...), const char *name, - ArgTypes2&&... args) -{ - typename CLObj::cl_type param_value; - call_guarded(func, name, args..., size_arg(param_value), nullptr); - void *value; - if (param_value) { - value = (void*)(new CLObj(param_value, /*retain*/ true)); - } else { - value = nullptr; - } - return make_generic_info(CLObj::class_id, "void *", false, value, true); -} -#define pyopencl_get_opaque_info(clobj, what, ...) \ - get_opaque_info(clGet##what##Info, \ - "clGet" #what "Info", __VA_ARGS__) - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -get_str_info(cl_int (CL_API_CALL *func)(ArgTypes...), const char *name, - ArgTypes2&&... args) -{ - size_t size; - call_guarded(func, name, args..., 0, nullptr, buf_arg(size)); - pyopencl_buf param_value(size); - call_guarded(func, name, args..., param_value, buf_arg(size)); - return make_generic_info(CLASS_NONE, "char*", false, (void*)param_value.release(), true); -} -#define pyopencl_get_str_info(what, ...) \ - get_str_info(clGet##what##Info, "clGet" #what "Info", __VA_ARGS__) - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -get_int_info(cl_int (CL_API_CALL *func)(ArgTypes...), const char *name, - const char *tpname, ArgTypes2&&... args) -{ - T value; - call_guarded(func, name, args..., size_arg(value), nullptr); - return make_generic_info(CLASS_NONE, tpname, false, cl_memdup(&value), true); -} -#define pyopencl_get_int_info(type, what, ...) \ - get_int_info(clGet##what##Info, "clGet" #what "Info", \ - #type "*", __VA_ARGS__) - -// }}} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE T* -convert_obj(cl_int (CL_API_CALL *clRelease)(CLType), const char *name, CLType cl_obj, - ArgTypes&&... args) -{ - try { - return new T(cl_obj, false, std::forward(args)...); - } catch (...) { - call_guarded_cleanup(clRelease, name, cl_obj); - throw; - } -} -#define pyopencl_convert_obj(type, func, ...) \ - convert_obj(func, #func, __VA_ARGS__) - -// {{{ extension function pointers - -#if PYOPENCL_CL_VERSION >= 0x1020 -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE T -get_ext_fun(cl_platform_id plat, const char *name, const char *err) -{ - T func = (T)clGetExtensionFunctionAddressForPlatform(plat, name); - if (!func) { - throw clerror(name, CL_INVALID_VALUE, err); - } - return func; -} -#define pyopencl_get_ext_fun(plat, name) \ - get_ext_fun(plat, #name, #name " not available") -#else -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE T -get_ext_fun(const char *name, const char *err) -{ - T func = (T)clGetExtensionFunctionAddress(name); - if (!func) { - throw clerror(name, CL_INVALID_VALUE, err); - } - return func; -} -#define pyopencl_get_ext_fun(plat, name) \ - get_ext_fun(#name, #name " not available") -#endif - -// }}} - -static PYOPENCL_INLINE std::ostream& -operator<<(std::ostream &stm, const cl_image_format &fmt) -{ - stm << "channel_order: " << fmt.image_channel_order - << ",\nchannel_data_type: " << fmt.image_channel_data_type; - return stm; -} - -#ifdef CL_DEVICE_TOPOLOGY_AMD -static PYOPENCL_INLINE std::ostream& -operator<<(std::ostream &stm, const cl_device_topology_amd &topol) -{ - stm << "pcie.bus: " << topol.pcie.bus - << ",\npcie.device: " << topol.pcie.device - << ",\npcie.function: " << topol.pcie.function - << ",\npcie.type: " << topol.pcie.type; - return stm; -} -#endif -#endif diff --git a/src/c_wrapper/clinfo_ext.h b/src/c_wrapper/clinfo_ext.h deleted file mode 100644 index 43b7b608..00000000 --- a/src/c_wrapper/clinfo_ext.h +++ /dev/null @@ -1,129 +0,0 @@ -/* Include OpenCL header, and define OpenCL extensions, since what is and is not - * available in the official headers is very system-dependent */ - -#ifndef _EXT_H -#define _EXT_H - -#if (defined(__APPLE__) && !defined(PYOPENCL_APPLE_USE_CL_H)) -#include -#else -#include -#endif - -/* These two defines were introduced in the 1.2 headers - * on 2012-11-30, so earlier versions don't have them - * (e.g. Debian wheezy) - */ - -#ifndef CL_DEVICE_IMAGE_PITCH_ALIGNMENT -#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A -#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B -#endif - -/* - * Extensions - */ - -/* cl_khr_icd */ -#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 -#define CL_PLATFORM_NOT_FOUND_KHR -1001 - - -/* cl_khr_fp64 */ -#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 - -/* cl_khr_fp16 */ -#define CL_DEVICE_HALF_FP_CONFIG 0x1033 - -/* cl_khr_terminate_context */ -#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F - -/* cl_nv_device_attribute_query */ -#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 -#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 -#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 -#define CL_DEVICE_WARP_SIZE_NV 0x4003 -#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 -#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 -#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 -#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007 -#define CL_DEVICE_PCI_BUS_ID_NV 0x4008 -#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 - -/* cl_ext_atomic_counters_{32,64} */ -#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032 - -/* cl_amd_device_attribute_query */ -#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 -#define CL_DEVICE_TOPOLOGY_AMD 0x4037 -#define CL_DEVICE_BOARD_NAME_AMD 0x4038 -#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 -#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 -#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 -#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 -#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 -#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 -#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 -#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 -#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 -#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 -#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049 -#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A -#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B -#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C - -#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD -#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 - -typedef union -{ - struct { cl_uint type; cl_uint data[5]; } raw; - struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; -} cl_device_topology_amd; -#endif - -/* cl_amd_offline_devices */ -#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F - -/* cl_ext_device_fission */ -#define cl_ext_device_fission 1 - -typedef cl_ulong cl_device_partition_property_ext; - -#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 -#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 -#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 -#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 /* cl_intel_device_partition_by_names */ -#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 - -#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 -#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 -#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 -#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 -#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 - -#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 -#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 -#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 -#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 -#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 -#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 - -/* cl_intel_advanced_motion_estimation */ -#define CL_DEVICE_ME_VERSION_INTEL 0x407E - -/* cl_qcom_ext_host_ptr */ -#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 -#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 - -/* cl_khr_spir */ -#define CL_DEVICE_SPIR_VERSIONS 0x40E0 - -/* cl_altera_device_temperature */ -#define CL_DEVICE_CORE_TEMPERATURE_ALTERA 0x40F3 - -/* cl_intel_simultaneous_sharing */ -#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 -#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 - -#endif diff --git a/src/c_wrapper/clobj.h b/src/c_wrapper/clobj.h deleted file mode 100644 index 5db08710..00000000 --- a/src/c_wrapper/clobj.h +++ /dev/null @@ -1,149 +0,0 @@ -#include "utils.h" - -#ifndef __PYOPENCL_CLOBJ_H -#define __PYOPENCL_CLOBJ_H - -#define PYOPENCL_DEF_CL_CLASS(name) \ - constexpr static class_t class_id = CLASS_##name; \ - constexpr static const char *class_name = #name; - -struct clbase { -private: - // non-copyable - clbase(const clbase&) = delete; - clbase &operator=(const clbase&) = delete; - bool operator==(clbase const &other) const = delete; - bool operator!=(clbase const &other) const = delete; -public: - clbase() = default; - virtual ~clbase() = default; - virtual intptr_t intptr() const = 0; - virtual generic_info get_info(cl_uint) const = 0; -}; - -template -class clobj : public clbase { -private: - CLType m_obj; -public: - typedef CLType cl_type; - PYOPENCL_INLINE - clobj(CLType obj, bool=false) : m_obj(obj) - {} - PYOPENCL_INLINE const CLType& - data() const - { - return m_obj; - } - intptr_t - intptr() const - { - return (intptr_t)m_obj; - } -}; - -template -void -print_clobj(std::ostream &stm, const CLObj *obj) -{ - stm << CLObj::class_name << "(" << (const void*)obj << ")<" - << (const void*)obj->data() << ">"; -} - -template -class CLArg, - CLObj>::value> > { -private: - CLObj &m_obj; -public: - CLArg(CLObj &obj) : m_obj(obj) - { - } - PYOPENCL_INLINE const typename CLObj::cl_type& - convert() const - { - return m_obj.data(); - } - PYOPENCL_INLINE void - print(std::ostream &stm) - { - print_clobj(stm, &m_obj); - } -}; - -template -class CLArg, - CLObj>::value> > { -private: - CLObj *m_obj; -public: - CLArg(CLObj *obj) : m_obj(obj) - { - } - PYOPENCL_INLINE const typename CLObj::cl_type& - convert() const - { - return m_obj->data(); - } - PYOPENCL_INLINE void - print(std::ostream &stm) - { - print_clobj(stm, m_obj); - } -}; - -template -static PYOPENCL_INLINE CLObj* -clobj_from_int_ptr(intptr_t ptr, bool retain) -{ - return new CLObj(reinterpret_cast(ptr), retain); -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf -buf_from_class(T2 *buf2, size_t len) -{ - pyopencl_buf buf(len); - for (size_t i = 0;i < len;i++) { - buf[i] = static_cast(buf2[i])->data(); - } - return buf; -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf -buf_from_class(T2 &&buf) -{ - return buf_from_class(buf.get(), buf.len()); -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf -buf_to_base(T2 *buf2, size_t len, ArgTypes&&... args) -{ - pyopencl_buf buf(len); - size_t i = 0; - try { - for (;i < len;i++) { - buf[i] = static_cast( - new T((typename T::cl_type)buf2[i], - std::forward(args)...)); - } - } catch (...) { - for (size_t j = 0;j < i;j++) { - delete buf[i]; - } - throw; - } - return buf; -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf -buf_to_base(T2 &&buf2, ArgTypes&&... args) -{ - return buf_to_base(buf2.get(), buf2.len(), - std::forward(args)...); -} - -#endif diff --git a/src/c_wrapper/command_queue.cpp b/src/c_wrapper/command_queue.cpp deleted file mode 100644 index b8ecef1e..00000000 --- a/src/c_wrapper/command_queue.cpp +++ /dev/null @@ -1,132 +0,0 @@ -#include "command_queue.h" -#include "device.h" -#include "context.h" -#include "event.h" -#include "clhelper.h" - -template class clobj; -template void print_arg(std::ostream&, - const cl_command_queue&, bool); -template void print_clobj(std::ostream&, const command_queue*); -template void print_buf( - std::ostream&, const cl_command_queue*, size_t, ArgType, bool, bool); - -command_queue::~command_queue() -{ - pyopencl_call_guarded_cleanup(clReleaseCommandQueue, PYOPENCL_CL_CASTABLE_THIS); -} - -generic_info -command_queue::get_info(cl_uint param_name) const -{ - switch ((cl_command_queue_info)param_name) { - case CL_QUEUE_CONTEXT: - return pyopencl_get_opaque_info(context, CommandQueue, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_QUEUE_DEVICE: - return pyopencl_get_opaque_info(device, CommandQueue, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_QUEUE_REFERENCE_COUNT: - return pyopencl_get_int_info(cl_uint, CommandQueue, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_QUEUE_PROPERTIES: - return pyopencl_get_int_info(cl_command_queue_properties, - CommandQueue, PYOPENCL_CL_CASTABLE_THIS, param_name); - default: - throw clerror("CommandQueue.get_info", CL_INVALID_VALUE); - } -} - -// c wrapper - -// Command Queue -error* -create_command_queue(clobj_t *queue, clobj_t _ctx, - clobj_t _dev, cl_command_queue_properties props) -{ - auto ctx = static_cast(_ctx); - auto py_dev = static_cast(_dev); - return c_handle_error([&] { - cl_device_id dev; - if (py_dev) { - dev = py_dev->data(); - } else { - auto devs = pyopencl_get_vec_info(cl_device_id, Context, - ctx, CL_CONTEXT_DEVICES); - if (devs.len() == 0) { - throw clerror("CommandQueue", CL_INVALID_VALUE, - "context doesn't have any devices? -- " - "don't know which one to default to"); - } - dev = devs[0]; - } - cl_command_queue cl_queue = - pyopencl_call_guarded(clCreateCommandQueue, ctx, dev, props); - *queue = new command_queue(cl_queue, false); - }); -} - -error* -command_queue__finish(clobj_t queue) -{ - return c_handle_error([&] { - pyopencl_call_guarded(clFinish, static_cast(queue)); - }); -} - -error* -command_queue__flush(clobj_t queue) -{ - return c_handle_error([&] { - pyopencl_call_guarded(clFlush, static_cast(queue)); - }); -} - -error* -enqueue_marker_with_wait_list(clobj_t *evt, clobj_t _queue, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto queue = static_cast(_queue); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_error([&] { - pyopencl_call_guarded(clEnqueueMarkerWithWaitList, queue, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueMarkerWithWaitList, "CL 1.2") -#endif -} - -error* -enqueue_barrier_with_wait_list(clobj_t *evt, clobj_t _queue, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto queue = static_cast(_queue); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_error([&] { - pyopencl_call_guarded(clEnqueueBarrierWithWaitList, queue, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueBarrierWithWaitList, "CL 1.2") -#endif -} - -error* -enqueue_marker(clobj_t *evt, clobj_t _queue) -{ - auto queue = static_cast(_queue); - return c_handle_error([&] { - pyopencl_call_guarded(clEnqueueMarker, queue, event_out(evt)); - }); -} - -error* -enqueue_barrier(clobj_t _queue) -{ - auto queue = static_cast(_queue); - return c_handle_error([&] { - pyopencl_call_guarded(clEnqueueBarrier, queue); - }); -} diff --git a/src/c_wrapper/command_queue.h b/src/c_wrapper/command_queue.h deleted file mode 100644 index 3a7c0171..00000000 --- a/src/c_wrapper/command_queue.h +++ /dev/null @@ -1,64 +0,0 @@ -#include "error.h" - -#ifndef __PYOPENCL_COMMAND_QUEUE_H -#define __PYOPENCL_COMMAND_QUEUE_H - -// {{{ command_queue - -extern template class clobj; -extern template void print_arg( - std::ostream&, const cl_command_queue&, bool); -extern template void print_buf( - std::ostream&, const cl_command_queue*, size_t, ArgType, bool, bool); - -class command_queue : public clobj { -public: - PYOPENCL_DEF_CL_CLASS(COMMAND_QUEUE); - PYOPENCL_INLINE - command_queue(cl_command_queue q, bool retain) - : clobj(q) - { - if (retain) { - pyopencl_call_guarded(clRetainCommandQueue, PYOPENCL_CL_CASTABLE_THIS); - } - } - PYOPENCL_INLINE - command_queue(const command_queue &queue) - : command_queue(queue.data(), true) - {} - ~command_queue(); - - generic_info get_info(cl_uint param_name) const; - -#if 0 - - PYOPENCL_USE_RESULT std::unique_ptr - get_context() const - { - cl_context param_value; - pyopencl_call_guarded(clGetCommandQueueInfo, this, CL_QUEUE_CONTEXT, - size_arg(param_value), nullptr); - return std::unique_ptr( - new context(param_value, /*retain*/ true)); - } - -#if PYOPENCL_CL_VERSION < 0x1010 - cl_command_queue_properties - set_property(cl_command_queue_properties prop, bool enable) const - { - cl_command_queue_properties old_prop; - pyopencl_call_guarded(clSetCommandQueueProperty, this, prop, - enable, buf_arg(old_prop)); - return old_prop; - } -#endif - -#endif -}; - -extern template void print_clobj(std::ostream&, - const command_queue*); - -// }}} - -#endif diff --git a/src/c_wrapper/context.cpp b/src/c_wrapper/context.cpp deleted file mode 100644 index 0fe48554..00000000 --- a/src/c_wrapper/context.cpp +++ /dev/null @@ -1,153 +0,0 @@ -#include "context.h" -#include "device.h" -#include "platform.h" -#include "clhelper.h" - -template class clobj; -template void print_arg(std::ostream&, const cl_context&, bool); -template void print_clobj(std::ostream&, const context*); -template void print_buf(std::ostream&, const cl_context*, - size_t, ArgType, bool, bool); - -void -context::get_version(cl_context ctx, int *major, int *minor) -{ - cl_device_id s_buff[16]; - size_t size; - pyopencl_buf d_buff(0); - cl_device_id *devs = s_buff; - pyopencl_call_guarded(clGetContextInfo, ctx, CL_CONTEXT_DEVICES, - 0, nullptr, buf_arg(size)); - if (PYOPENCL_UNLIKELY(!size)) { - throw clerror("Context.get_version", CL_INVALID_VALUE, - "Cannot get devices from context."); - } - if (PYOPENCL_UNLIKELY(size > sizeof(s_buff))) { - d_buff.resize(size / sizeof(cl_device_id)); - devs = d_buff.get(); - } - pyopencl_call_guarded(clGetContextInfo, ctx, CL_CONTEXT_DEVICES, - size_arg(devs, size), buf_arg(size)); - device::get_version(devs[0], major, minor); -} - -context::~context() -{ - pyopencl_call_guarded_cleanup(clReleaseContext, PYOPENCL_CL_CASTABLE_THIS); -} - -generic_info -context::get_info(cl_uint param_name) const -{ - switch ((cl_context_info)param_name) { - case CL_CONTEXT_REFERENCE_COUNT: - return pyopencl_get_int_info(cl_uint, Context, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_CONTEXT_DEVICES: - return pyopencl_get_opaque_array_info(device, Context, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_CONTEXT_PROPERTIES: { - auto result = pyopencl_get_vec_info( - cl_context_properties, Context, PYOPENCL_CL_CASTABLE_THIS, param_name); - pyopencl_buf py_result(result.len() / 2); - size_t i = 0; - for (;i < py_result.len();i++) { - cl_context_properties key = result[i * 2]; - if (key == 0) - break; - cl_context_properties value = result[i * 2 + 1]; - switch (key) { - case CL_CONTEXT_PLATFORM: - py_result[i] = make_generic_info( - CLASS_PLATFORM, - "void *", false, - new platform(reinterpret_cast(value)), true); - break; - -#if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1) -#if defined(__APPLE__) && defined(HAVE_GL) && !defined(PYOPENCL_APPLE_USE_CL_H) - case CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE: -#else - case CL_GL_CONTEXT_KHR: - case CL_EGL_DISPLAY_KHR: - case CL_GLX_DISPLAY_KHR: - case CL_WGL_HDC_KHR: - case CL_CGL_SHAREGROUP_KHR: -#endif - py_result[i] = make_generic_info( - CLASS_NONE, - "intptr_t *", false, - (void*)value, - // we do not own this object - false); - break; -#endif - default: - throw clerror("Context.get_info", CL_INVALID_VALUE, - "unknown context_property key encountered"); - } - } - py_result.resize(i); - return pyopencl_convert_array_info(generic_info, py_result); - } - -#if PYOPENCL_CL_VERSION >= 0x1010 - case CL_CONTEXT_NUM_DEVICES: - return pyopencl_get_int_info(cl_uint, Context, - PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif - - default: - throw clerror("Context.get_info", CL_INVALID_VALUE); - } -} - -// c wrapper - -// Context -error* -create_context(clobj_t *_ctx, const cl_context_properties *props, - cl_uint num_devices, const clobj_t *_devices) -{ - // TODO debug print properties - return c_handle_error([&] { - const auto devices = buf_from_class(_devices, num_devices); - *_ctx = new context( - pyopencl_call_guarded( - clCreateContext, - const_cast(props), - devices, nullptr, nullptr), false); - }); -} - -// Context -error* -create_context_from_type(clobj_t *_ctx, const cl_context_properties *props, - cl_device_type dev_type) -{ - // TODO debug print properties - return c_handle_error([&] { - *_ctx = new context( - pyopencl_call_guarded( - clCreateContextFromType, - const_cast(props), - dev_type, nullptr, nullptr), false); - }); -} - -error* -context__get_supported_image_formats(clobj_t _ctx, cl_mem_flags flags, - cl_mem_object_type image_type, - generic_info *out) -{ - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - cl_uint num; - pyopencl_call_guarded(clGetSupportedImageFormats, ctx, flags, - image_type, 0, nullptr, buf_arg(num)); - pyopencl_buf formats(num); - pyopencl_call_guarded(clGetSupportedImageFormats, ctx, flags, - image_type, formats, buf_arg(num)); - *out = pyopencl_convert_array_info(cl_image_format, formats); - }); -} diff --git a/src/c_wrapper/context.h b/src/c_wrapper/context.h deleted file mode 100644 index 1691035d..00000000 --- a/src/c_wrapper/context.h +++ /dev/null @@ -1,34 +0,0 @@ -#include "error.h" - -#ifndef __PYOPENCL_CONTEXT_H -#define __PYOPENCL_CONTEXT_H - -// {{{ context - -extern template class clobj; -extern template void print_arg(std::ostream&, - const cl_context&, bool); -extern template void print_buf(std::ostream&, const cl_context*, - size_t, ArgType, bool, bool); - -class context : public clobj { -public: - PYOPENCL_DEF_CL_CLASS(CONTEXT); - static void get_version(cl_context ctx, int *major, int *minor); - PYOPENCL_INLINE - context(cl_context ctx, bool retain) - : clobj(ctx) - { - if (retain) { - pyopencl_call_guarded(clRetainContext, PYOPENCL_CL_CASTABLE_THIS); - } - } - ~context(); - generic_info get_info(cl_uint param_name) const; -}; - -extern template void print_clobj(std::ostream&, const context*); - -// }}} - -#endif diff --git a/src/c_wrapper/debug.cpp b/src/c_wrapper/debug.cpp deleted file mode 100644 index a118b468..00000000 --- a/src/c_wrapper/debug.cpp +++ /dev/null @@ -1,84 +0,0 @@ -#include "debug.h" -#include -#include -#include -#include - -std::mutex dbg_lock; - -void -dbg_print_str(std::ostream &stm, const char *str, size_t len) -{ - stm << '"'; - for (size_t i = 0;i < len;i++) { - char escaped = 0; -#define escape_char(in, out) \ - case in: \ - escaped = out; \ - break - switch (str[i]) { - escape_char('\'', '\''); - escape_char('\"', '\"'); - escape_char('\?', '\?'); - escape_char('\\', '\\'); - escape_char('\0', '0'); - escape_char('\a', 'a'); - escape_char('\b', 'b'); - escape_char('\f', 'f'); - escape_char('\r', 'r'); - escape_char('\v', 'v'); - default: - break; - } - if (escaped) { - stm << '\\' << escaped; - } else { - stm << str[i]; - } - } - stm << '"'; -} - -void -dbg_print_bytes(std::ostream &stm, const unsigned char *bytes, size_t len) -{ - stm << '"'; - for (size_t i = 0;i < len;i++) { - stm << "\\x" << std::hex << std::setfill('0') - << std::setw(2) << bytes[i]; - } - stm << std::dec << '"'; -} - -static PYOPENCL_INLINE bool -_get_debug_env() -{ - const char *env = getenv("PYOPENCL_DEBUG"); - const bool default_debug = DEFAULT_DEBUG; - if (!env) { - return default_debug; - } - if (strcasecmp(env, "0") == 0 || strcasecmp(env, "f") == 0 || - strcasecmp(env, "false") == 0 || strcasecmp(env, "off") == 0) { - return false; - } - if (strcasecmp(env, "1") == 0 || strcasecmp(env, "t") == 0 || - strcasecmp(env, "true") == 0 || strcasecmp(env, "on") == 0) { - return true; - } - return default_debug; -} - -bool debug_enabled = _get_debug_env(); - -int -get_debug() -{ - return (int) debug_enabled; -} - -void -set_debug(int debug) -{ - debug_enabled = (bool)debug; -} diff --git a/src/c_wrapper/debug.h b/src/c_wrapper/debug.h deleted file mode 100644 index f0700030..00000000 --- a/src/c_wrapper/debug.h +++ /dev/null @@ -1,33 +0,0 @@ -#include "wrap_cl.h" -#include "function.h" -#include -#include - -#ifdef __MINGW32__ -#include "mingw-std-threads/mingw.mutex.h" -#include "mingw-std-threads/mingw.thread.h" -#endif - -#ifndef __PYOPENCL_DEBUG_H -#define __PYOPENCL_DEBUG_H - -extern bool debug_enabled; -#ifdef PYOPENCL_TRACE -#define DEFAULT_DEBUG true -#else -#define DEFAULT_DEBUG false -#endif - -#define DEBUG_ON (PYOPENCL_EXPECT(debug_enabled, DEFAULT_DEBUG)) - -extern std::mutex dbg_lock; - -void dbg_print_str(std::ostream&, const char*, size_t); -static PYOPENCL_INLINE void -dbg_print_str(std::ostream &stm, const char *str) -{ - return dbg_print_str(stm, str, strlen(str)); -} -void dbg_print_bytes(std::ostream &stm, const unsigned char *bytes, size_t len); - -#endif diff --git a/src/c_wrapper/device.cpp b/src/c_wrapper/device.cpp deleted file mode 100644 index 16edaf34..00000000 --- a/src/c_wrapper/device.cpp +++ /dev/null @@ -1,375 +0,0 @@ -#include "device.h" -#include "platform.h" - -template class clobj; -template void print_arg(std::ostream&, - const cl_device_id&, bool); -template void print_clobj(std::ostream&, const device*); -template void print_buf(std::ostream&, const cl_device_id*, - size_t, ArgType, bool, bool); - -void -device::get_version(cl_device_id dev, int *major, int *minor) -{ - cl_platform_id plat; - pyopencl_call_guarded(clGetDeviceInfo, dev, CL_DEVICE_PLATFORM, - size_arg(plat), nullptr); - platform::get_version(plat, major, minor); -} - -device::~device() -{ - if (false) { - } -#if PYOPENCL_CL_VERSION >= 0x1020 - else if (m_ref_type == REF_CL_1_2) { - pyopencl_call_guarded_cleanup(clReleaseDevice, PYOPENCL_CL_CASTABLE_THIS); - } -#endif -} - -#ifdef CL_DEVICE_TOPOLOGY_AMD -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -get_device_topology_amd(ArgTypes&&... args) -{ - const char * tpname = "cl_device_topology_amd*"; - cl_device_topology_amd value; - const char * fname = "clGetDeviceInfo"; - call_guarded(clGetDeviceInfo, fname, args..., size_arg(value), nullptr); - return make_generic_info(CLASS_NONE, tpname, false, cl_memdup(&value), true); -} - -#define pyopencl_get_device_topology_amd(...) get_device_topology_amd(__VA_ARGS__) - -#endif - -generic_info -device::get_info(cl_uint param_name) const -{ -#define DEV_GET_INT_INF(TYPE) \ - pyopencl_get_int_info(TYPE, Device, PYOPENCL_CL_CASTABLE_THIS, param_name) - - switch ((cl_device_info)param_name) { - case CL_DEVICE_TYPE: - return DEV_GET_INT_INF(cl_device_type); - case CL_DEVICE_MAX_WORK_GROUP_SIZE: - return DEV_GET_INT_INF(size_t); - case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: - case CL_DEVICE_MAX_COMPUTE_UNITS: - case CL_DEVICE_VENDOR_ID: - return DEV_GET_INT_INF(cl_uint); - - case CL_DEVICE_MAX_WORK_ITEM_SIZES: - return pyopencl_get_array_info(size_t, Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: - - case CL_DEVICE_MAX_CLOCK_FREQUENCY: - case CL_DEVICE_ADDRESS_BITS: - case CL_DEVICE_MAX_READ_IMAGE_ARGS: - case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: - case CL_DEVICE_MAX_SAMPLERS: - case CL_DEVICE_MEM_BASE_ADDR_ALIGN: - case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: - return DEV_GET_INT_INF(cl_uint); - - case CL_DEVICE_MAX_MEM_ALLOC_SIZE: - return DEV_GET_INT_INF(cl_ulong); - - case CL_DEVICE_IMAGE2D_MAX_WIDTH: - case CL_DEVICE_IMAGE2D_MAX_HEIGHT: - case CL_DEVICE_IMAGE3D_MAX_WIDTH: - case CL_DEVICE_IMAGE3D_MAX_HEIGHT: - case CL_DEVICE_IMAGE3D_MAX_DEPTH: - case CL_DEVICE_MAX_PARAMETER_SIZE: - return DEV_GET_INT_INF(size_t); - - case CL_DEVICE_IMAGE_SUPPORT: - return DEV_GET_INT_INF(cl_bool); -#ifdef CL_DEVICE_DOUBLE_FP_CONFIG - case CL_DEVICE_DOUBLE_FP_CONFIG: -#endif -#ifdef CL_DEVICE_HALF_FP_CONFIG - case CL_DEVICE_HALF_FP_CONFIG: -#endif - case CL_DEVICE_SINGLE_FP_CONFIG: - return DEV_GET_INT_INF(cl_device_fp_config); - - case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: - return DEV_GET_INT_INF(cl_device_mem_cache_type); - case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: - case CL_DEVICE_GLOBAL_MEM_SIZE: - case CL_DEVICE_LOCAL_MEM_SIZE: - case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: - return DEV_GET_INT_INF(cl_ulong); - - case CL_DEVICE_MAX_CONSTANT_ARGS: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_LOCAL_MEM_TYPE: - return DEV_GET_INT_INF(cl_device_local_mem_type); - case CL_DEVICE_PROFILING_TIMER_RESOLUTION: - return DEV_GET_INT_INF(size_t); - case CL_DEVICE_ENDIAN_LITTLE: - case CL_DEVICE_AVAILABLE: - case CL_DEVICE_COMPILER_AVAILABLE: - case CL_DEVICE_ERROR_CORRECTION_SUPPORT: - return DEV_GET_INT_INF(cl_bool); - case CL_DEVICE_EXECUTION_CAPABILITIES: - return DEV_GET_INT_INF(cl_device_exec_capabilities); - case CL_DEVICE_QUEUE_PROPERTIES: - // same as CL_DEVICE_QUEUE_ON_HOST_PROPERTIES in 2.0 - return DEV_GET_INT_INF(cl_command_queue_properties); - - case CL_DEVICE_NAME: - case CL_DEVICE_VENDOR: - case CL_DRIVER_VERSION: - case CL_DEVICE_PROFILE: - case CL_DEVICE_VERSION: - case CL_DEVICE_EXTENSIONS: - return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - - case CL_DEVICE_PLATFORM: - return pyopencl_get_opaque_info(platform, Device, PYOPENCL_CL_CASTABLE_THIS, param_name); -#if PYOPENCL_CL_VERSION >= 0x1010 - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: - return DEV_GET_INT_INF(cl_uint); - - case CL_DEVICE_HOST_UNIFIED_MEMORY: // deprecated in 2.0 - return DEV_GET_INT_INF(cl_bool); - case CL_DEVICE_OPENCL_C_VERSION: - return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV - case CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV: - case CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV: - case CL_DEVICE_REGISTERS_PER_BLOCK_NV: - case CL_DEVICE_WARP_SIZE_NV: -#ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV - case CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV: -#endif -#ifdef CL_DEVICE_PCI_BUS_ID_NV - case CL_DEVICE_PCI_BUS_ID_NV: -#endif -#ifdef CL_DEVICE_PCI_SLOT_ID_NV - case CL_DEVICE_PCI_SLOT_ID_NV: -#endif - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_GPU_OVERLAP_NV: - case CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV: - case CL_DEVICE_INTEGRATED_MEMORY_NV: - return DEV_GET_INT_INF(cl_bool); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - case CL_DEVICE_LINKER_AVAILABLE: - return DEV_GET_INT_INF(cl_bool); - case CL_DEVICE_BUILT_IN_KERNELS: - return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: - case CL_DEVICE_IMAGE_MAX_ARRAY_SIZE: - return DEV_GET_INT_INF(size_t); - case CL_DEVICE_PARENT_DEVICE: - return pyopencl_get_opaque_info(device, Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_DEVICE_PARTITION_MAX_SUB_DEVICES: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_PARTITION_TYPE: - case CL_DEVICE_PARTITION_PROPERTIES: - return pyopencl_get_array_info(cl_device_partition_property, - Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: - return pyopencl_get_array_info(cl_device_affinity_domain, - Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_DEVICE_REFERENCE_COUNT: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC: - case CL_DEVICE_PRINTF_BUFFER_SIZE: - return DEV_GET_INT_INF(cl_bool); -#endif -#ifdef cl_khr_image2d_from_buffer - case CL_DEVICE_IMAGE_PITCH_ALIGNMENT: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT: - return DEV_GET_INT_INF(cl_uint); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - case CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE: - return DEV_GET_INT_INF(size_t); - case CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES: - return DEV_GET_INT_INF(cl_command_queue_properties); - case CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_MAX_ON_DEVICE_QUEUES: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_MAX_ON_DEVICE_EVENTS: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_SVM_CAPABILITIES: - return DEV_GET_INT_INF(cl_device_svm_capabilities); - case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: - return DEV_GET_INT_INF(size_t); - case CL_DEVICE_MAX_PIPE_ARGS: - case CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS: - case CL_DEVICE_PIPE_MAX_PACKET_SIZE: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT: - case CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT: - case CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT: - return DEV_GET_INT_INF(cl_uint); -#endif -#if PYOPENCL_CL_VERSION >= 0x2010 - case CL_DEVICE_IL_VERSION: - return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_DEVICE_MAX_NUM_SUB_GROUPS: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: - return DEV_GET_INT_INF(cl_bool); -#endif - - - // {{{ AMD dev attrs - // - // types of AMD dev attrs divined from - // https://www.khronos.org/registry/cl/api/1.2/cl.hpp -#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD - case CL_DEVICE_PROFILING_TIMER_OFFSET_AMD: - return DEV_GET_INT_INF(cl_ulong); -#endif -#ifdef CL_DEVICE_TOPOLOGY_AMD - case CL_DEVICE_TOPOLOGY_AMD: - return pyopencl_get_device_topology_amd(PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD - case CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD: - return DEV_GET_INT_INF(cl_bool); -#endif -#ifdef CL_DEVICE_BOARD_NAME_AMD - case CL_DEVICE_BOARD_NAME_AMD: ; - return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD - case CL_DEVICE_GLOBAL_FREE_MEMORY_AMD: - return pyopencl_get_array_info(size_t, Device, - PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD - case CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD: -#endif -#ifdef CL_DEVICE_SIMD_WIDTH_AMD - case CL_DEVICE_SIMD_WIDTH_AMD: -#endif -#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD - case CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD: -#endif -#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD - case CL_DEVICE_WAVEFRONT_WIDTH_AMD: -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD - case CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD: -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD - case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD: -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD - case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD: -#endif -#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD - case CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD: -#endif -#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD - case CL_DEVICE_LOCAL_MEM_BANKS_AMD: -#endif -#ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT - case CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT: -#endif -#ifdef CL_DEVICE_GFXIP_MAJOR_AMD - case CL_DEVICE_GFXIP_MAJOR_AMD: -#endif -#ifdef CL_DEVICE_GFXIP_MINOR_AMD - case CL_DEVICE_GFXIP_MINOR_AMD: -#endif -#ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD - case CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD: -#endif - return DEV_GET_INT_INF(cl_uint); - // }}} -#ifdef CL_DEVICE_ME_VERSION_INTEL - case CL_DEVICE_ME_VERSION_INTEL: -#endif -#ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM - case CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM: -#endif -#ifdef CL_DEVICE_PAGE_SIZE_QCOM - case CL_DEVICE_PAGE_SIZE_QCOM: -#endif -#ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL - case CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL: -#endif - return DEV_GET_INT_INF(cl_uint); -#ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL - case CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL: - return pyopencl_get_array_info(cl_uint, Device, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#ifdef CL_DEVICE_SPIR_VERSIONS - case CL_DEVICE_SPIR_VERSIONS: - return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA - case CL_DEVICE_CORE_TEMPERATURE_ALTERA: - return DEV_GET_INT_INF(cl_int); -#endif - - default: - throw clerror("Device.get_info", CL_INVALID_VALUE); - } -} - -#if PYOPENCL_CL_VERSION >= 0x1020 -PYOPENCL_USE_RESULT pyopencl_buf -device::create_sub_devices(const cl_device_partition_property *props) -{ - // TODO debug print props - cl_uint num_devices; - pyopencl_call_guarded(clCreateSubDevices, PYOPENCL_CL_CASTABLE_THIS, props, 0, nullptr, - buf_arg(num_devices)); - pyopencl_buf devices(num_devices); - pyopencl_call_guarded(clCreateSubDevices, PYOPENCL_CL_CASTABLE_THIS, props, devices, - buf_arg(num_devices)); - return buf_to_base(devices, true, device::REF_CL_1_2); -} -#endif - -// c wrapper - -error* -device__create_sub_devices(clobj_t _dev, clobj_t **_devs, - uint32_t *num_devices, - const cl_device_partition_property *props) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto dev = static_cast(_dev); - return c_handle_error([&] { - auto devs = dev->create_sub_devices(props); - *num_devices = (uint32_t)devs.len(); - *_devs = devs.release(); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clCreateSubDevices, "CL 1.2") -#endif -} diff --git a/src/c_wrapper/device.h b/src/c_wrapper/device.h deleted file mode 100644 index a14a9468..00000000 --- a/src/c_wrapper/device.h +++ /dev/null @@ -1,61 +0,0 @@ -#include "clhelper.h" - -#ifndef __PYOPENCL_DEVICE_H -#define __PYOPENCL_DEVICE_H - -// {{{ device - -extern template class clobj; -extern template void print_arg(std::ostream&, - const cl_device_id&, bool); -extern template void print_buf(std::ostream&, const cl_device_id*, - size_t, ArgType, bool, bool); - -class device : public clobj { -public: - PYOPENCL_DEF_CL_CLASS(DEVICE); - enum reference_type_t { - REF_NOT_OWNABLE, - REF_CL_1_2, - }; - -private: - reference_type_t m_ref_type; - -public: - static void get_version(cl_device_id dev, int *major, int *minor); - device(cl_device_id did, bool retain=false, - reference_type_t ref_type=REF_NOT_OWNABLE) - : clobj(did), m_ref_type(ref_type) - { - if (retain && ref_type != REF_NOT_OWNABLE) { - if (false) { - } -#if PYOPENCL_CL_VERSION >= 0x1020 - else if (ref_type == REF_CL_1_2) { - pyopencl_call_guarded(clRetainDevice, PYOPENCL_CL_CASTABLE_THIS); - } -#endif - - else { - throw clerror("Device", CL_INVALID_VALUE, - "cannot own references to devices when device " - "fission or CL 1.2 is not available"); - } - } - } - - ~device(); - - generic_info get_info(cl_uint param_name) const; -#if PYOPENCL_CL_VERSION >= 0x1020 - PYOPENCL_USE_RESULT pyopencl_buf - create_sub_devices(const cl_device_partition_property *props); -#endif -}; - -extern template void print_clobj(std::ostream&, const device*); - -// }}} - -#endif diff --git a/src/c_wrapper/error.h b/src/c_wrapper/error.h deleted file mode 100644 index 30e985f9..00000000 --- a/src/c_wrapper/error.h +++ /dev/null @@ -1,337 +0,0 @@ -#include "wrap_cl.h" -#include "pyhelper.h" -#include "clobj.h" - -#include -#include -#include -#include -#include -#include - -#ifndef __PYOPENCL_ERROR_H -#define __PYOPENCL_ERROR_H - -// {{{ error - -// See https://github.com/inducer/pyopencl/pull/83 -#if GCC_VERSION > 50200 -#define PYOPENCL_CL_CASTABLE_THIS this -#else -#define PYOPENCL_CL_CASTABLE_THIS data() -#endif - -// discouraged, assumes 'version linearity', use PYOPENCL_UNSUPPORTED_BEFORE -#define PYOPENCL_UNSUPPORTED(ROUTINE, VERSION) \ - auto err = (error*)malloc(sizeof(error)); \ - err->routine = strdup(#ROUTINE); \ - err->msg = strdup("unsupported in " VERSION); \ - err->code = CL_INVALID_VALUE; \ - err->other = 0; \ - return err; - -#define PYOPENCL_UNSUPPORTED_BEFORE(ROUTINE, VERSION) \ - auto err = (error*)malloc(sizeof(error)); \ - err->routine = strdup(#ROUTINE); \ - err->msg = strdup("unsupported before " VERSION); \ - err->code = CL_INVALID_VALUE; \ - err->other = 0; \ - return err; - -class clerror : public std::runtime_error { -private: - const char *m_routine; - cl_int m_code; - -public: - clerror(const char *rout, cl_int c, const char *msg="") - : std::runtime_error(msg), m_routine(rout), m_code(c) - { - if (DEBUG_ON) { - std::lock_guard lock(dbg_lock); - std::cerr << rout << ";" << msg<< ";" << c << std::endl; - } - } - PYOPENCL_INLINE const char* - routine() const - { - return m_routine; - } - - PYOPENCL_INLINE cl_int - code() const - { - return m_code; - } - - PYOPENCL_INLINE bool - is_out_of_memory() const - { - // matches Python implementation in pyopencl/cffi_cl.py - return (code() == CL_MEM_OBJECT_ALLOCATION_FAILURE || - code() == CL_OUT_OF_RESOURCES || - code() == CL_OUT_OF_HOST_MEMORY); - } -}; - -// }}} - -// {{{ tracing and error reporting - -template -struct __CLArgGetter { - template - static PYOPENCL_INLINE auto - get(T&& clarg) -> decltype(clarg.convert()) - { - return clarg.convert(); - } -}; - -template -struct __CLFinish { - static PYOPENCL_INLINE void - call(T, bool) - { - } -}; - -template -struct __CLFinish().finish(true)))> { - static PYOPENCL_INLINE void - call(T v, bool converted) - { - v.finish(converted); - } -}; - -template -struct __CLPost { - static PYOPENCL_INLINE void - call(T) - { - } -}; - -template -struct __CLPost().post()))> { - static PYOPENCL_INLINE void - call(T v) - { - v.post(); - } -}; - -template -struct is_out_arg : std::false_type {}; - -template -struct is_out_arg::is_out> > : std::true_type {}; - -template -struct __CLPrintOut { - static PYOPENCL_INLINE void - call(T, std::ostream&) - { - } -}; - -template -struct __CLPrintOut::value> > { - static inline void - call(T v, std::ostream &stm) - { - stm << ", "; - v.print(stm, true); - } -}; - -template -struct __CLPrint { - static inline void - call(T v, std::ostream &stm, bool &&first) - { - if (!first) { - stm << ", "; - } else { - first = false; - } - if (is_out_arg::value) { - stm << "{out}"; - } - v.print(stm); - } -}; - -template class Caller, size_t n, typename T> -struct __CLCall { - template - static PYOPENCL_INLINE void - call(T &&t, Ts&&... ts) - { - __CLCall::call(std::forward(t), - std::forward(ts)...); - Caller(t))>::call(std::get(t), - std::forward(ts)...); - } -}; - -template class Caller, typename T> -struct __CLCall { - template - static PYOPENCL_INLINE void - call(T &&t, Ts&&... ts) - { - Caller(t))>::call(std::get<0>(t), - std::forward(ts)...); - } -}; - -template -class CLArgPack : public ArgPack { - template void - _print_trace(T &res, const char *name) - { - typename CLArgPack::tuple_base *that = this; - std::cerr << name << "("; - __CLCall<__CLPrint, sizeof...(Types) - 1, - decltype(*that)>::call(*that, std::cerr, true); - std::cerr << ") = (ret: " << res; - __CLCall<__CLPrintOut, sizeof...(Types) - 1, - decltype(*that)>::call(*that, std::cerr); - std::cerr << ")" << std::endl; - } -public: - using ArgPack::ArgPack; - template - PYOPENCL_INLINE auto - clcall(Func func, const char *name) - -> decltype(this->template call<__CLArgGetter>(func)) - { - auto res = this->template call<__CLArgGetter>(func); - if (DEBUG_ON) { - std::lock_guard lock(dbg_lock); - _print_trace(res, name); - } - return res; - } - PYOPENCL_INLINE void - finish() - { - typename CLArgPack::tuple_base *that = this; - __CLCall<__CLFinish, sizeof...(Types) - 1, - decltype(*that)>::call(*that, false); - __CLCall<__CLPost, sizeof...(Types) - 1, - decltype(*that)>::call(*that); - __CLCall<__CLFinish, sizeof...(Types) - 1, - decltype(*that)>::call(*that, true); - } -}; - -template -static PYOPENCL_INLINE CLArgPack...> -make_clargpack(Types&&... args) -{ - return CLArgPack...>(std::forward(args)...); -} - -template -static PYOPENCL_INLINE void -call_guarded(cl_int (CL_API_CALL *func)(ArgTypes...), const char *name, ArgTypes2&&... args) -{ - auto argpack = make_clargpack(std::forward(args)...); - cl_int status_code = argpack.clcall(func, name); - if (status_code != CL_SUCCESS) { - throw clerror(name, status_code); - } - argpack.finish(); -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE T -call_guarded(T (CL_API_CALL *func)(ArgTypes...), const char *name, ArgTypes2&&... args) -{ - cl_int status_code = CL_SUCCESS; - auto status_arg = buf_arg(status_code); - auto argpack = make_clargpack(std::forward(args)..., status_arg); - T res = argpack.clcall(func, name); - if (status_code != CL_SUCCESS) { - throw clerror(name, status_code); - } - argpack.finish(); - return res; -} -#define pyopencl_call_guarded(func, ...) \ - call_guarded(func, #func, __VA_ARGS__) - -static PYOPENCL_INLINE void -cleanup_print_error(cl_int status_code, const char *name) noexcept -{ - std::cerr << ("PyOpenCL WARNING: a clean-up operation failed " - "(dead context maybe?)") << std::endl - << name << " failed with code " << status_code << std::endl; -} - -template -static PYOPENCL_INLINE void -call_guarded_cleanup(cl_int (CL_API_CALL *func)(ArgTypes...), const char *name, - ArgTypes2&&... args) -{ - auto argpack = make_clargpack(std::forward(args)...); - cl_int status_code = argpack.clcall(func, name); - if (status_code != CL_SUCCESS) { - cleanup_print_error(status_code, name); - } else { - argpack.finish(); - } -} -#define pyopencl_call_guarded_cleanup(func, ...) \ - call_guarded_cleanup(func, #func, __VA_ARGS__) - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE error* -c_handle_error(Func func) noexcept -{ - try { - func(); - return nullptr; - } catch (const clerror &e) { - auto err = (error*)malloc(sizeof(error)); - err->routine = strdup(e.routine()); - err->msg = strdup(e.what()); - err->code = e.code(); - err->other = 0; - return err; - } catch (const std::exception &e) { - /* non-pyopencl exceptions need to be converted as well */ - auto err = (error*)malloc(sizeof(error)); - err->other = 1; - err->msg = strdup(e.what()); - return err; - } -} - -template -static PYOPENCL_INLINE auto -retry_mem_error(Func func) -> decltype(func()) -{ - try { - return func(); - } catch (clerror &e) { - if (PYOPENCL_LIKELY(!e.is_out_of_memory()) || !py::gc()) { - throw; - } - } - return func(); -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE error* -c_handle_retry_mem_error(Func &&func) noexcept -{ - return c_handle_error([&] {retry_mem_error(std::forward(func));}); -} - -// }}} - -#endif diff --git a/src/c_wrapper/event.cpp b/src/c_wrapper/event.cpp deleted file mode 100644 index d75c3a32..00000000 --- a/src/c_wrapper/event.cpp +++ /dev/null @@ -1,294 +0,0 @@ -#include "event.h" -#include "command_queue.h" -#include "context.h" -#include "pyhelper.h" - -#include - -template class clobj; -template void print_arg(std::ostream&, const cl_event&, bool); -template void print_clobj(std::ostream&, const event*); -template void print_buf(std::ostream&, const cl_event*, - size_t, ArgType, bool, bool); - -class event_private { - mutable volatile std::atomic_bool m_finished; - virtual void finish() noexcept = 0; -public: - event_private() - : m_finished(false) - {} - virtual - ~event_private() - {} - void - call_finish() noexcept - { - if (m_finished.exchange(true)) - return; - finish(); - } - bool - is_finished() noexcept - { - return m_finished; - } -}; - -event::event(cl_event event, bool retain, event_private *p) - : clobj(event), m_p(p) -{ - if (retain) { - try { - pyopencl_call_guarded(clRetainEvent, PYOPENCL_CL_CASTABLE_THIS); - } catch (...) { - m_p->call_finish(); - delete m_p; - throw; - } - } -} - -#if PYOPENCL_CL_VERSION >= 0x1010 -static PYOPENCL_INLINE bool -release_private_use_cb(event *evt) -{ - try { - cl_int status = 0; - pyopencl_call_guarded(clGetEventInfo, evt, - CL_EVENT_COMMAND_EXECUTION_STATUS, - size_arg(status), nullptr); - // Event Callback may not be run immediately when the event - // is already completed. - if (status <= CL_COMPLETE) - return false; - cl_context ctx; - pyopencl_call_guarded(clGetEventInfo, evt, CL_EVENT_CONTEXT, - size_arg(ctx), nullptr); - int major; - int minor; - context::get_version(ctx, &major, &minor); - return (major > 1) || (major >= 1 && minor >= 1); - } catch (const clerror &e) { - cleanup_print_error(e.code(), e.what()); - return false; - } -} -#endif - -void -event::release_private() noexcept -{ - if (!m_p) - return; - if (m_p->is_finished()) { - delete m_p; - return; - } -#if PYOPENCL_CL_VERSION >= 0x1010 && defined(PYOPENCL_HAVE_EVENT_SET_CALLBACK) - if (release_private_use_cb(this)) { - try { - event_private *p = m_p; - set_callback(CL_COMPLETE, [p] (cl_int) { - p->call_finish(); - delete p; - }); - return; - } catch (const clerror &e) { - cleanup_print_error(e.code(), e.what()); - } - } -#endif - wait(); - delete m_p; -} - -event::~event() -{ - release_private(); - pyopencl_call_guarded_cleanup(clReleaseEvent, PYOPENCL_CL_CASTABLE_THIS); -} - -generic_info -event::get_info(cl_uint param_name) const -{ - switch ((cl_event_info)param_name) { - case CL_EVENT_COMMAND_QUEUE: - return pyopencl_get_opaque_info(command_queue, Event, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_EVENT_COMMAND_TYPE: - return pyopencl_get_int_info(cl_command_type, Event, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_EVENT_COMMAND_EXECUTION_STATUS: - return pyopencl_get_int_info(cl_int, Event, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_EVENT_REFERENCE_COUNT: - return pyopencl_get_int_info(cl_uint, Event, PYOPENCL_CL_CASTABLE_THIS, param_name); -#if PYOPENCL_CL_VERSION >= 0x1010 - case CL_EVENT_CONTEXT: - return pyopencl_get_opaque_info(context, Event, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif - - default: - throw clerror("Event.get_info", CL_INVALID_VALUE); - } -} - -generic_info -event::get_profiling_info(cl_profiling_info param) const -{ - switch (param) { - case CL_PROFILING_COMMAND_QUEUED: - case CL_PROFILING_COMMAND_SUBMIT: - case CL_PROFILING_COMMAND_START: - case CL_PROFILING_COMMAND_END: - return pyopencl_get_int_info(cl_ulong, EventProfiling, PYOPENCL_CL_CASTABLE_THIS, param); - default: - throw clerror("Event.get_profiling_info", CL_INVALID_VALUE); - } -} - -void -event::wait() const -{ - pyopencl_call_guarded(clWaitForEvents, len_arg(data())); - if (m_p) { - m_p->call_finish(); - } -} - -class nanny_event_private : public event_private { - void *m_ward; - void finish() noexcept - { - void *ward = m_ward; - m_ward = nullptr; - py::deref(ward); - } -public: - nanny_event_private(void *ward) - : m_ward(nullptr) - { - m_ward = py::ref(ward); - } - PYOPENCL_USE_RESULT PYOPENCL_INLINE void* - get_ward() const noexcept - { - return m_ward; - } -}; - -nanny_event::nanny_event(cl_event evt, bool retain, void *ward) - : event(evt, retain, ward ? new nanny_event_private(ward) : nullptr) -{ -} - -PYOPENCL_USE_RESULT void* -nanny_event::get_ward() const noexcept -{ - return (get_p() ? static_cast(get_p())->get_ward() : - nullptr); -} - -#if PYOPENCL_CL_VERSION >= 0x1010 -class user_event : public event { -public: - using event::event; - PYOPENCL_INLINE void - set_status(cl_int status) - { - pyopencl_call_guarded(clSetUserEventStatus, PYOPENCL_CL_CASTABLE_THIS, status); - } -}; -#endif - -// c wrapper - -// Event -error* -event__get_profiling_info(clobj_t _evt, cl_profiling_info param, - generic_info *out) -{ - auto evt = static_cast(_evt); - return c_handle_error([&] { - *out = evt->get_profiling_info(param); - }); -} - -error* -event__wait(clobj_t evt) -{ - return c_handle_error([&] { - static_cast(evt)->wait(); - }); -} - - -error* -event__set_callback(clobj_t _evt, cl_int type, void *pyobj) -{ -#if PYOPENCL_CL_VERSION >= 0x1010 && defined(PYOPENCL_HAVE_EVENT_SET_CALLBACK) - auto evt = static_cast(_evt); - return c_handle_error([&] { - pyobj = py::ref(pyobj); - try { - evt->set_callback(type, [=] (cl_int status) { - py::call(pyobj, status); - py::deref(pyobj); - }); - } catch (...) { - py::deref(pyobj); - } - }); -#else - PYOPENCL_UNSUPPORTED(clSetEventCallback, "CL 1.0 and below and Windows") -#endif -} - -// Nanny Event -void* -nanny_event__get_ward(clobj_t evt) -{ - return static_cast(evt)->get_ward(); -} - -error* -wait_for_events(const clobj_t *_wait_for, uint32_t num_wait_for) -{ - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_error([&] { - pyopencl_call_guarded(clWaitForEvents, wait_for); - }); -} - -error* -enqueue_wait_for_events(clobj_t _queue, const clobj_t *_wait_for, - uint32_t num_wait_for) -{ - auto queue = static_cast(_queue); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_error([&] { - pyopencl_call_guarded(clEnqueueWaitForEvents, queue, wait_for); - }); -} - -#if PYOPENCL_CL_VERSION >= 0x1010 - -error* -create_user_event(clobj_t *_evt, clobj_t _ctx) -{ - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - auto evt = pyopencl_call_guarded(clCreateUserEvent, ctx); - *_evt = pyopencl_convert_obj(user_event, clReleaseEvent, evt); - }); -} - -error* -user_event__set_status(clobj_t _evt, cl_int status) -{ - auto evt = static_cast(_evt); - return c_handle_error([&] { - evt->set_status(status); - }); -} - -#endif diff --git a/src/c_wrapper/event.h b/src/c_wrapper/event.h deleted file mode 100644 index c6d0dd4b..00000000 --- a/src/c_wrapper/event.h +++ /dev/null @@ -1,87 +0,0 @@ -#include "clhelper.h" -#include - -#ifndef __PYOPENCL_EVENT_H -#define __PYOPENCL_EVENT_H - -// {{{ event - -extern template class clobj; -extern template void print_arg(std::ostream&, const cl_event&, bool); -extern template void print_buf(std::ostream&, const cl_event*, - size_t, ArgType, bool, bool); - -class event_private; - -class event : public clobj { - event_private *m_p; - // return whether the event need to be released. - void release_private() noexcept; -protected: - PYOPENCL_INLINE event_private* - get_p() const - { - return m_p; - } -public: - PYOPENCL_DEF_CL_CLASS(EVENT); - event(cl_event event, bool retain, event_private *p=nullptr); - ~event(); - generic_info get_info(cl_uint param) const; - PYOPENCL_USE_RESULT generic_info - get_profiling_info(cl_profiling_info param) const; - void wait() const; -#if PYOPENCL_CL_VERSION >= 0x1010 && defined(PYOPENCL_HAVE_EVENT_SET_CALLBACK) - template - PYOPENCL_INLINE void - set_callback(cl_int type, Func &&_func) - { - auto func = new rm_ref_t(std::forward(_func)); - try { - pyopencl_call_guarded( - clSetEventCallback, PYOPENCL_CL_CASTABLE_THIS, type, - static_cast( - [] (cl_event, cl_int status, void *data) { - rm_ref_t *func = static_cast*>(data); - - // We won't necessarily be able to acquire the GIL inside this - // handler without deadlocking. Create a thread that *can* - // wait. - - std::thread t([func, status] () { - (*func)(status); - delete func; - }); - t.detach(); - - }), (void*)func); - } catch (...) { - delete func; - throw; - } - } -#endif -}; -static PYOPENCL_INLINE auto -event_out(clobj_t *ret) -> decltype(pyopencl_outarg(event, ret, clReleaseEvent)) -{ - return pyopencl_outarg(event, ret, clReleaseEvent); -} - -extern template void print_clobj(std::ostream&, const event*); - -class nanny_event : public event { -public: - nanny_event(cl_event evt, bool retain, void *ward=nullptr); - PYOPENCL_USE_RESULT void *get_ward() const noexcept; -}; -static PYOPENCL_INLINE auto -nanny_event_out(clobj_t *ret, void *ward) - -> decltype(pyopencl_outarg(nanny_event, ret, clReleaseEvent, ward)) -{ - return pyopencl_outarg(nanny_event, ret, clReleaseEvent, ward); -} - -// }}} - -#endif diff --git a/src/c_wrapper/function.h b/src/c_wrapper/function.h deleted file mode 100644 index 5d1a604c..00000000 --- a/src/c_wrapper/function.h +++ /dev/null @@ -1,121 +0,0 @@ -#include -#include - -#ifndef __PYOPENCL_FUNCTION_H -#define __PYOPENCL_FUNCTION_H - -#if defined __GNUC__ && __GNUC__ > 3 -#define PYOPENCL_INLINE inline __attribute__((__always_inline__)) -#else -#define PYOPENCL_INLINE inline -#endif - -template -using rm_ref_t = typename std::remove_reference::type; -template -using rm_const_t = typename std::remove_const::type; -template -using enable_if_t = typename std::enable_if::type; - -template -struct seq { -}; - -template -struct gens : gens { -}; - -template -struct gens<0, S...> { - typedef seq type; -}; - -template -static PYOPENCL_INLINE auto -_call_func(Function func, seq, std::tuple &args) - -> decltype(func(std::forward(std::get(args))...)) -{ - return func(static_cast(std::get(args))...); -} - -template -static PYOPENCL_INLINE auto -call_tuple(Function &&func, T &&args) - -> decltype(_call_func(std::forward(func), - typename gens::value>::type(), - args)) -{ - return _call_func(std::forward(func), - typename gens::value>::type(), args); -} - -template class Convert, typename... Types> -using _ArgPackBase = std::tuple::type>...>; - -template class Convert, typename... Types> -class ArgPack : public _ArgPackBase { -public: - typedef _ArgPackBase tuple_base; -private: - template - static PYOPENCL_INLINE std::tuple - ensure_tuple(T &&v) - { - return std::tuple(std::forward(v)); - } - template - static PYOPENCL_INLINE std::tuple - ensure_tuple(std::tuple &&t) - { - return t; - } - - template - using ArgConvert = Convert >; - template class Getter, int... S> - PYOPENCL_INLINE auto - __get(seq) -#ifndef _MSC_VER - -> decltype(std::tuple_cat( - ensure_tuple(Getter >::get( - std::get(*(tuple_base*)this)))...)) -#endif - { - return std::tuple_cat( - ensure_tuple(Getter >::get( - std::get(*(tuple_base*)this)))...); - } -public: - template - ArgPack(Types2&&... arg_orig) - : tuple_base(ArgConvert >(arg_orig)...) - { - } - ArgPack(ArgPack &&other) - : tuple_base(static_cast(other)) - { - } - // GCC Bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57543 - template class Getter> - PYOPENCL_INLINE auto - get() -> decltype(this->__get( - typename gens::type())) - { - return __get(typename gens::type()); - } - template class Getter, typename Func> - PYOPENCL_INLINE auto - call(Func func) -> decltype(call_tuple(func, this->get())) - { - return call_tuple(func, this->get()); - } -}; - -template class Convert, typename... Types> -static PYOPENCL_INLINE ArgPack...> -make_argpack(Types&&... args) -{ - return ArgPack...>(std::forward(args)...); -} - -#endif diff --git a/src/c_wrapper/gl_obj.cpp b/src/c_wrapper/gl_obj.cpp deleted file mode 100644 index bd7edf31..00000000 --- a/src/c_wrapper/gl_obj.cpp +++ /dev/null @@ -1,155 +0,0 @@ -#include "gl_obj.h" -#include "context.h" -#include "command_queue.h" -#include "event.h" -#include "clhelper.h" - -#ifdef HAVE_GL - -template void print_clobj(std::ostream&, const gl_buffer*); -template void print_clobj(std::ostream&, - const gl_renderbuffer*); - -generic_info -gl_texture::get_gl_texture_info(cl_gl_texture_info param_name) const -{ - switch (param_name) { - case CL_GL_TEXTURE_TARGET: - return pyopencl_get_int_info(GLenum, GLTexture, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_GL_MIPMAP_LEVEL: - return pyopencl_get_int_info(GLint, GLTexture, PYOPENCL_CL_CASTABLE_THIS, param_name); - default: - throw clerror("MemoryObject.get_gl_texture_info", CL_INVALID_VALUE); - } -} - -typedef cl_int (CL_API_CALL *clEnqueueGLObjectFunc)(cl_command_queue, cl_uint, - const cl_mem*, cl_uint, - const cl_event*, cl_event*); - -static PYOPENCL_INLINE void -enqueue_gl_objects(clEnqueueGLObjectFunc func, const char *name, - clobj_t *evt, command_queue *cq, const clobj_t *mem_objects, - uint32_t num_mem_objects, const clobj_t *wait_for, - uint32_t num_wait_for) -{ - const auto _wait_for = buf_from_class(wait_for, num_wait_for); - const auto _mem_objs = buf_from_class( - mem_objects, num_mem_objects); - call_guarded(func, name, cq, _mem_objs, _wait_for, event_out(evt)); -} -#define enqueue_gl_objects(what, ...) \ - enqueue_gl_objects(clEnqueue##what##GLObjects, \ - "clEnqueue" #what "GLObjects", __VA_ARGS__) - -// c wrapper - -error* -create_from_gl_texture(clobj_t *ptr, clobj_t _ctx, cl_mem_flags flags, - GLenum texture_target, GLint miplevel, - GLuint texture) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - cl_mem mem = pyopencl_call_guarded(clCreateFromGLTexture, - ctx, flags, texture_target, miplevel, texture); - *ptr = pyopencl_convert_obj(gl_texture, clReleaseMemObject, mem); - }); -#else - PYOPENCL_UNSUPPORTED(clCreateFromGLTexture, "CL 1.1") -#endif -} - -error* -create_from_gl_buffer(clobj_t *ptr, clobj_t _ctx, - cl_mem_flags flags, GLuint bufobj) -{ - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - cl_mem mem = pyopencl_call_guarded(clCreateFromGLBuffer, - ctx, flags, bufobj); - *ptr = pyopencl_convert_obj(gl_buffer, clReleaseMemObject, mem); - }); -} - -error* -create_from_gl_renderbuffer(clobj_t *ptr, clobj_t _ctx, - cl_mem_flags flags, GLuint bufobj) -{ - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - cl_mem mem = pyopencl_call_guarded(clCreateFromGLRenderbuffer, - ctx, flags, bufobj); - *ptr = pyopencl_convert_obj(gl_renderbuffer, - clReleaseMemObject, mem); - }); -} - -error* -enqueue_acquire_gl_objects(clobj_t *evt, clobj_t queue, - const clobj_t *mem_objects, - uint32_t num_mem_objects, - const clobj_t *wait_for, uint32_t num_wait_for) -{ - return c_handle_error([&] { - enqueue_gl_objects( - Acquire, evt, static_cast(queue), - mem_objects, num_mem_objects, wait_for, num_wait_for); - }); -} - -error* -enqueue_release_gl_objects(clobj_t *evt, clobj_t queue, - const clobj_t *mem_objects, - uint32_t num_mem_objects, - const clobj_t *wait_for, uint32_t num_wait_for) -{ - return c_handle_error([&] { - enqueue_gl_objects( - Release, evt, static_cast(queue), - mem_objects, num_mem_objects, wait_for, num_wait_for); - }); -} - -error* -get_gl_object_info(clobj_t mem, cl_gl_object_type *otype, GLuint *gl_name) -{ - auto globj = static_cast(mem); - return c_handle_error([&] { - pyopencl_call_guarded(clGetGLObjectInfo, globj, buf_arg(*otype), - buf_arg(*gl_name)); - }); -} - -#endif - -int -have_gl() -{ -#ifdef HAVE_GL - return 1; -#else - return 0; -#endif -} - -cl_context_properties -get_apple_cgl_share_group() -{ -#if (defined(__APPLE__) && !defined(PYOPENCL_APPLE_USE_CL_H)) - #ifdef HAVE_GL - CGLContextObj kCGLContext = CGLGetCurrentContext(); - CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext); - - return (cl_context_properties)kCGLShareGroup; - #else - throw clerror("get_apple_cgl_share_group unavailable: " - "GL interop not compiled", - CL_INVALID_VALUE); - #endif -#else - throw clerror("get_apple_cgl_share_group unavailable: non-Apple platform", - CL_INVALID_VALUE); -#endif /* __APPLE__ */ -} diff --git a/src/c_wrapper/gl_obj.h b/src/c_wrapper/gl_obj.h deleted file mode 100644 index 9f47e19b..00000000 --- a/src/c_wrapper/gl_obj.h +++ /dev/null @@ -1,46 +0,0 @@ -#include "image.h" - -#ifndef __PYOPENCL_GL_OBJ_H -#define __PYOPENCL_GL_OBJ_H - -#ifdef HAVE_GL - -// {{{ gl interop - -class gl_buffer : public memory_object { -public: - PYOPENCL_DEF_CL_CLASS(GL_BUFFER); - PYOPENCL_INLINE - gl_buffer(cl_mem mem, bool retain) - : memory_object(mem, retain) - {} -}; - -class gl_renderbuffer : public memory_object { -public: - PYOPENCL_DEF_CL_CLASS(GL_RENDERBUFFER); - PYOPENCL_INLINE - gl_renderbuffer(cl_mem mem, bool retain) - : memory_object(mem, retain) - {} -}; - -extern template void print_clobj(std::ostream&, const gl_buffer*); -extern template void print_clobj(std::ostream&, - const gl_renderbuffer*); - -class gl_texture : public image { - public: - PYOPENCL_INLINE - gl_texture(cl_mem mem, bool retain) - : image(mem, retain) - {} - PYOPENCL_USE_RESULT generic_info - get_gl_texture_info(cl_gl_texture_info param_name) const; -}; - -// }}} - -#endif - -#endif diff --git a/src/c_wrapper/image.cpp b/src/c_wrapper/image.cpp deleted file mode 100644 index 6f571f32..00000000 --- a/src/c_wrapper/image.cpp +++ /dev/null @@ -1,237 +0,0 @@ -#include "image.h" -#include "context.h" -#include "command_queue.h" -#include "event.h" -#include "buffer.h" - -template void print_clobj(std::ostream&, const image*); - -PYOPENCL_USE_RESULT static PYOPENCL_INLINE image* -new_image(cl_mem mem, const cl_image_format *fmt) -{ - return pyopencl_convert_obj(image, clReleaseMemObject, mem, fmt); -} - -generic_info -image::get_image_info(cl_image_info param) const -{ - switch (param) { - case CL_IMAGE_FORMAT: - return pyopencl_get_int_info(cl_image_format, Image, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_IMAGE_ELEMENT_SIZE: - case CL_IMAGE_ROW_PITCH: - case CL_IMAGE_SLICE_PITCH: - case CL_IMAGE_WIDTH: - case CL_IMAGE_HEIGHT: - case CL_IMAGE_DEPTH: -#if PYOPENCL_CL_VERSION >= 0x1020 - case CL_IMAGE_ARRAY_SIZE: -#endif - return pyopencl_get_int_info(size_t, Image, PYOPENCL_CL_CASTABLE_THIS, param); - -#if PYOPENCL_CL_VERSION >= 0x1020 - // TODO: - // case CL_IMAGE_BUFFER: - // { - // cl_mem param_value; - // PYOPENCL_CALL_GUARDED(clGetImageInfo, (this, param, sizeof(param_value), ¶m_value, 0)); - // if (param_value == 0) - // { - // // no associated memory object? no problem. - // return py::object(); - // } - // return create_mem_object_wrapper(param_value); - // } - case CL_IMAGE_NUM_MIP_LEVELS: - case CL_IMAGE_NUM_SAMPLES: - return pyopencl_get_int_info(cl_uint, Image, PYOPENCL_CL_CASTABLE_THIS, param); -#endif - default: - throw clerror("Image.get_image_info", CL_INVALID_VALUE); - } -} - -// c wrapper - -// Image -error* -create_image_2d(clobj_t *img, clobj_t _ctx, cl_mem_flags flags, - cl_image_format *fmt, size_t width, size_t height, - size_t pitch, void *buf) -{ - auto ctx = static_cast(_ctx); - return c_handle_retry_mem_error([&] { - auto mem = pyopencl_call_guarded(clCreateImage2D, ctx, flags, fmt, - width, height, pitch, buf); - *img = new_image(mem, fmt); - }); -} - -error* -create_image_3d(clobj_t *img, clobj_t _ctx, cl_mem_flags flags, - cl_image_format *fmt, size_t width, size_t height, - size_t depth, size_t pitch_x, size_t pitch_y, void *buf) -{ - auto ctx = static_cast(_ctx); - return c_handle_retry_mem_error([&] { - auto mem = pyopencl_call_guarded(clCreateImage3D, ctx, flags, fmt, - width, height, depth, pitch_x, - pitch_y, buf); - *img = new_image(mem, fmt); - }); -} - - -error* -create_image_from_desc(clobj_t *img, clobj_t _ctx, cl_mem_flags flags, - cl_image_format *fmt, cl_image_desc *desc, void *buf) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - auto mem = pyopencl_call_guarded(clCreateImage, ctx, flags, fmt, - desc, buf); - *img = new_image(mem, fmt); - }); -#else - PYOPENCL_UNSUPPORTED(clCreateImage, "CL 1.1 and below") -#endif -} - - -error* -image__get_image_info(clobj_t _img, cl_image_info param, generic_info *out) -{ - auto img = static_cast(_img); - return c_handle_error([&] { - *out = img->get_image_info(param); - }); -} - -type_t -image__get_fill_type(clobj_t img) -{ - return static_cast(img)->get_fill_type(); -} - -error* -enqueue_read_image(clobj_t *evt, clobj_t _queue, clobj_t _mem, - const size_t *_orig, size_t orig_l, - const size_t *_reg, size_t reg_l, void *buf, - size_t row_pitch, size_t slice_pitch, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block, void *pyobj) -{ - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto img = static_cast(_mem); - ConstBuffer orig(_orig, orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueReadImage, queue, img, bool(block), - orig, reg, row_pitch, slice_pitch, buf, - wait_for, nanny_event_out(evt, pyobj)); - }); -} - -error* -enqueue_copy_image(clobj_t *evt, clobj_t _queue, clobj_t _src, clobj_t _dst, - const size_t *_src_orig, size_t src_orig_l, - const size_t *_dst_orig, size_t dst_orig_l, - const size_t *_reg, size_t reg_l, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto src = static_cast(_src); - auto dst = static_cast(_dst); - ConstBuffer src_orig(_src_orig, src_orig_l); - ConstBuffer dst_orig(_dst_orig, dst_orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueCopyImage, queue, src, dst, src_orig, - dst_orig, reg, wait_for, event_out(evt)); - }); -} - -error* -enqueue_write_image(clobj_t *evt, clobj_t _queue, clobj_t _mem, - const size_t *_orig, size_t orig_l, - const size_t *_reg, size_t reg_l, - const void *buf, size_t row_pitch, size_t slice_pitch, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block, void *pyobj) -{ - auto queue = static_cast(_queue); - auto img = static_cast(_mem); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - ConstBuffer orig(_orig, orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueWriteImage, queue, img, bool(block), - orig, reg, row_pitch, slice_pitch, buf, - wait_for, nanny_event_out(evt, pyobj)); - }); -} - -error* -enqueue_fill_image(clobj_t *evt, clobj_t _queue, clobj_t mem, - const void *color, const size_t *_orig, size_t orig_l, - const size_t *_reg, size_t reg_l, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - // TODO debug color - auto queue = static_cast(_queue); - auto img = static_cast(mem); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - ConstBuffer orig(_orig, orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueFillImage, queue, img, color, orig, - reg, wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED(clEnqueueFillImage, "CL 1.1 and below") -#endif -} - -// {{{ image transfers - -error* -enqueue_copy_image_to_buffer(clobj_t *evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, const size_t *_orig, size_t orig_l, - const size_t *_reg, size_t reg_l, size_t offset, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ - auto queue = static_cast(_queue); - auto src = static_cast(_src); - auto dst = static_cast(_dst); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - ConstBuffer orig(_orig, orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueCopyImageToBuffer, queue, src, dst, - orig, reg, offset, wait_for, event_out(evt)); - }); -} - -error* -enqueue_copy_buffer_to_image(clobj_t *evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, size_t offset, const size_t *_orig, - size_t orig_l, const size_t *_reg, size_t reg_l, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ - auto queue = static_cast(_queue); - auto src = static_cast(_src); - auto dst = static_cast(_dst); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - ConstBuffer orig(_orig, orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueCopyBufferToImage, queue, src, dst, - offset, orig, reg, wait_for, event_out(evt)); - }); -} - -// }}} diff --git a/src/c_wrapper/image.h b/src/c_wrapper/image.h deleted file mode 100644 index 7d29909c..00000000 --- a/src/c_wrapper/image.h +++ /dev/null @@ -1,50 +0,0 @@ -#include "memory_object.h" -#include "clhelper.h" - -#ifndef __PYOPENCL_IMAGE_H -#define __PYOPENCL_IMAGE_H - -// {{{ image - -class image : public memory_object { -private: - cl_image_format m_format; -public: - PYOPENCL_DEF_CL_CLASS(IMAGE); - PYOPENCL_INLINE - image(cl_mem mem, bool retain, const cl_image_format *fmt=0) - : memory_object(mem, retain), m_format(fmt ? *fmt : cl_image_format()) - {} - PYOPENCL_INLINE const cl_image_format& - format() - { - if (!m_format.image_channel_data_type) { - pyopencl_call_guarded(clGetImageInfo, PYOPENCL_CL_CASTABLE_THIS, CL_IMAGE_FORMAT, - size_arg(m_format), nullptr); - } - return m_format; - } - PYOPENCL_USE_RESULT generic_info get_image_info(cl_image_info param) const; - PYOPENCL_INLINE type_t - get_fill_type() - { - switch (format().image_channel_data_type) { - case CL_SIGNED_INT8: - case CL_SIGNED_INT16: - case CL_SIGNED_INT32: - return TYPE_INT; - case CL_UNSIGNED_INT8: - case CL_UNSIGNED_INT16: - case CL_UNSIGNED_INT32: - return TYPE_UINT; - default: - return TYPE_FLOAT; - } - } -}; - -extern template void print_clobj(std::ostream&, const image*); - -// }}} - -#endif diff --git a/src/c_wrapper/kernel.cpp b/src/c_wrapper/kernel.cpp deleted file mode 100644 index 817e1061..00000000 --- a/src/c_wrapper/kernel.cpp +++ /dev/null @@ -1,213 +0,0 @@ -#include "kernel.h" -#include "context.h" -#include "device.h" -#include "program.h" -#include "memory_object.h" -#include "sampler.h" -#include "command_queue.h" -#include "event.h" -#include "clhelper.h" - -template class clobj; -template void print_arg(std::ostream&, const cl_kernel&, bool); -template void print_clobj(std::ostream&, const kernel*); -template void print_buf(std::ostream&, const cl_kernel*, - size_t, ArgType, bool, bool); - -kernel::~kernel() -{ - pyopencl_call_guarded_cleanup(clReleaseKernel, PYOPENCL_CL_CASTABLE_THIS); -} - -generic_info -kernel::get_info(cl_uint param) const -{ - switch ((cl_kernel_info)param) { - case CL_KERNEL_FUNCTION_NAME: - return pyopencl_get_str_info(Kernel, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_KERNEL_NUM_ARGS: - case CL_KERNEL_REFERENCE_COUNT: - return pyopencl_get_int_info(cl_uint, Kernel, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_KERNEL_CONTEXT: - return pyopencl_get_opaque_info(context, Kernel, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_KERNEL_PROGRAM: - return pyopencl_get_opaque_info(program, Kernel, PYOPENCL_CL_CASTABLE_THIS, param); -#if PYOPENCL_CL_VERSION >= 0x1020 - case CL_KERNEL_ATTRIBUTES: - return pyopencl_get_str_info(Kernel, PYOPENCL_CL_CASTABLE_THIS, param); -#endif - default: - throw clerror("Kernel.get_info", CL_INVALID_VALUE); - } -} - -generic_info -kernel::get_work_group_info(cl_kernel_work_group_info param, - const device *dev) const -{ - switch (param) { -#if PYOPENCL_CL_VERSION >= 0x1010 - case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: -#endif - case CL_KERNEL_WORK_GROUP_SIZE: - return pyopencl_get_int_info(size_t, KernelWorkGroup, PYOPENCL_CL_CASTABLE_THIS, dev, param); - case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: - return pyopencl_get_array_info(size_t, KernelWorkGroup, - PYOPENCL_CL_CASTABLE_THIS, dev, param); - case CL_KERNEL_LOCAL_MEM_SIZE: -#if PYOPENCL_CL_VERSION >= 0x1010 - case CL_KERNEL_PRIVATE_MEM_SIZE: -#endif - return pyopencl_get_int_info(cl_ulong, KernelWorkGroup, - PYOPENCL_CL_CASTABLE_THIS, dev, param); - default: - throw clerror("Kernel.get_work_group_info", CL_INVALID_VALUE); - } -} - -#if PYOPENCL_CL_VERSION >= 0x1020 -PYOPENCL_USE_RESULT generic_info -kernel::get_arg_info(cl_uint idx, cl_kernel_arg_info param) const -{ - switch (param) { - case CL_KERNEL_ARG_ADDRESS_QUALIFIER: - return pyopencl_get_int_info(cl_kernel_arg_address_qualifier, - KernelArg, PYOPENCL_CL_CASTABLE_THIS, idx, param); - case CL_KERNEL_ARG_ACCESS_QUALIFIER: - return pyopencl_get_int_info(cl_kernel_arg_access_qualifier, - KernelArg, PYOPENCL_CL_CASTABLE_THIS, idx, param); - case CL_KERNEL_ARG_TYPE_QUALIFIER: - return pyopencl_get_int_info(cl_kernel_arg_type_qualifier, - KernelArg, PYOPENCL_CL_CASTABLE_THIS, idx, param); - case CL_KERNEL_ARG_TYPE_NAME: - case CL_KERNEL_ARG_NAME: - return pyopencl_get_str_info(KernelArg, PYOPENCL_CL_CASTABLE_THIS, idx, param); - default: - throw clerror("Kernel.get_arg_info", CL_INVALID_VALUE); - } -} -#endif - -// c wrapper - -// Kernel -error* -create_kernel(clobj_t *knl, clobj_t _prog, const char *name) -{ - auto prog = static_cast(_prog); - return c_handle_error([&] { - *knl = new kernel(pyopencl_call_guarded(clCreateKernel, prog, - name), false); - }); -} - -error* -kernel__set_arg_null(clobj_t _knl, cl_uint arg_index) -{ - auto knl = static_cast(_knl); - return c_handle_error([&] { - const cl_mem m = 0; - pyopencl_call_guarded(clSetKernelArg, knl, arg_index, size_arg(m)); - }); -} - -error* -kernel__set_arg_mem(clobj_t _knl, cl_uint arg_index, clobj_t _mem) -{ - auto knl = static_cast(_knl); - auto mem = static_cast(_mem); - return c_handle_error([&] { - pyopencl_call_guarded(clSetKernelArg, knl, arg_index, - size_arg(mem->data())); - }); -} - -error* -kernel__set_arg_sampler(clobj_t _knl, cl_uint arg_index, clobj_t _samp) -{ - auto knl = static_cast(_knl); - auto samp = static_cast(_samp); - return c_handle_error([&] { - pyopencl_call_guarded(clSetKernelArg, knl, arg_index, - size_arg(samp->data())); - }); -} - -error* -kernel__set_arg_buf(clobj_t _knl, cl_uint arg_index, - const void *buffer, size_t size) -{ - auto knl = static_cast(_knl); - return c_handle_error([&] { - pyopencl_call_guarded(clSetKernelArg, knl, arg_index, - size_arg(buffer, size)); - }); -} - -error* -kernel__set_arg_svm_pointer(clobj_t _knl, cl_uint arg_index, void *value) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - auto knl = static_cast(_knl); - return c_handle_error([&] { - pyopencl_call_guarded(clSetKernelArgSVMPointer, knl, arg_index, value); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clSetKernelArgSVMPointer, "CL 2.0") -#endif -} - -error* -kernel__get_work_group_info(clobj_t _knl, cl_kernel_work_group_info param, - clobj_t _dev, generic_info *out) -{ - auto knl = static_cast(_knl); - auto dev = static_cast(_dev); - return c_handle_error([&] { - *out = knl->get_work_group_info(param, dev); - }); -} - -error* -kernel__get_arg_info(clobj_t _knl, cl_uint idx, cl_kernel_arg_info param, - generic_info *out) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto knl = static_cast(_knl); - return c_handle_error([&] { - *out = knl->get_arg_info(idx, param); - }); -#else - PYOPENCL_UNSUPPORTED(clKernelGetArgInfo, "CL 1.1 and below") -#endif -} - -error* -enqueue_nd_range_kernel(clobj_t *evt, clobj_t _queue, clobj_t _knl, - cl_uint work_dim, const size_t *global_work_offset, - const size_t *global_work_size, - const size_t *local_work_size, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ - auto queue = static_cast(_queue); - auto knl = static_cast(_knl); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueNDRangeKernel, queue, knl, work_dim, - global_work_offset, global_work_size, - local_work_size, wait_for, event_out(evt)); - }); -} - -error* -enqueue_task(clobj_t *evt, clobj_t _queue, clobj_t _knl, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ - auto queue = static_cast(_queue); - auto knl = static_cast(_knl); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueTask, queue, knl, wait_for, - event_out(evt)); - }); -} diff --git a/src/c_wrapper/kernel.h b/src/c_wrapper/kernel.h deleted file mode 100644 index 5db1a0cc..00000000 --- a/src/c_wrapper/kernel.h +++ /dev/null @@ -1,44 +0,0 @@ -#include "error.h" - -#ifndef __PYOPENCL_KERNEL_H -#define __PYOPENCL_KERNEL_H - -class device; - -// {{{ kernel - -extern template class clobj; -extern template void print_arg(std::ostream&, - const cl_kernel&, bool); -extern template void print_buf(std::ostream&, const cl_kernel*, - size_t, ArgType, bool, bool); - -class kernel : public clobj { -public: - PYOPENCL_DEF_CL_CLASS(KERNEL); - PYOPENCL_INLINE - kernel(cl_kernel knl, bool retain) - : clobj(knl) - { - if (retain) { - pyopencl_call_guarded(clRetainKernel, PYOPENCL_CL_CASTABLE_THIS); - } - } - ~kernel(); - generic_info get_info(cl_uint param) const; - - PYOPENCL_USE_RESULT generic_info - get_work_group_info(cl_kernel_work_group_info param, - const device *dev) const; - -#if PYOPENCL_CL_VERSION >= 0x1020 - PYOPENCL_USE_RESULT generic_info - get_arg_info(cl_uint idx, cl_kernel_arg_info param) const; -#endif -}; - -extern template void print_clobj(std::ostream&, const kernel*); - -// }}} - -#endif diff --git a/src/c_wrapper/memory_map.cpp b/src/c_wrapper/memory_map.cpp deleted file mode 100644 index 068274df..00000000 --- a/src/c_wrapper/memory_map.cpp +++ /dev/null @@ -1,115 +0,0 @@ -#include "memory_map.h" -#include "image.h" -#include "buffer.h" -#include "event.h" -#include "clhelper.h" - -template class clobj; -template void print_arg(std::ostream&, void *const&, bool); -template void print_buf(std::ostream&, void *const*, - size_t, ArgType, bool, bool); - -memory_map::~memory_map() -{ - if (!m_valid.exchange(false)) - return; - pyopencl_call_guarded_cleanup(clEnqueueUnmapMemObject, m_queue, - m_mem, PYOPENCL_CL_CASTABLE_THIS, 0, nullptr, nullptr); -} - -void -memory_map::release(clobj_t *evt, const command_queue *queue, - const clobj_t *_wait_for, uint32_t num_wait_for) const -{ - if (!m_valid.exchange(false)) { - throw clerror("MemoryMap.release", CL_INVALID_VALUE, - "trying to double-unref mem map"); - } - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - queue = queue ? queue : &m_queue; - pyopencl_call_guarded(clEnqueueUnmapMemObject, queue, - m_mem, PYOPENCL_CL_CASTABLE_THIS, wait_for, event_out(evt)); -} - -generic_info -memory_map::get_info(cl_uint) const -{ - throw clerror("MemoryMap.get_info", CL_INVALID_VALUE); -} - -intptr_t -memory_map::intptr() const -{ - return m_valid ? (intptr_t)data() : 0; -} - -memory_map* -convert_memory_map(clobj_t evt, command_queue *queue, - memory_object *buf, void *res) -{ - try { - return new memory_map(queue, buf, res); - } catch (...) { - delete evt; - pyopencl_call_guarded_cleanup(clEnqueueUnmapMemObject, queue, - buf, res, 0, nullptr, nullptr); - throw; - } -} - -// c wrapper - -// Memory Map -error* -memory_map__release(clobj_t _map, clobj_t _queue, const clobj_t *_wait_for, - uint32_t num_wait_for, clobj_t *evt) -{ - auto map = static_cast(_map); - auto queue = static_cast(_queue); - return c_handle_error([&] { - map->release(evt, queue, _wait_for, num_wait_for); - }); -} - -void* -memory_map__data(clobj_t _map) -{ - return static_cast(_map)->data(); -} - -error* -enqueue_map_image(clobj_t *evt, clobj_t *map, clobj_t _queue, clobj_t _mem, - cl_map_flags flags, const size_t *_orig, size_t orig_l, - const size_t *_reg, size_t reg_l, size_t *row_pitch, - size_t *slice_pitch, const clobj_t *_wait_for, - uint32_t num_wait_for, int block) -{ - auto queue = static_cast(_queue); - auto img = static_cast(_mem); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - ConstBuffer orig(_orig, orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - void *res = pyopencl_call_guarded( - clEnqueueMapImage, queue, img, bool(block), flags, orig, - reg, row_pitch, slice_pitch, wait_for, event_out(evt)); - *map = convert_memory_map(*evt, queue, img, res); - }); -} - -error* -enqueue_map_buffer(clobj_t *evt, clobj_t *map, clobj_t _queue, clobj_t _mem, - cl_map_flags flags, size_t offset, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block) -{ - auto queue = static_cast(_queue); - auto buf = static_cast(_mem); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_retry_mem_error([&] { - void *res = pyopencl_call_guarded( - clEnqueueMapBuffer, queue, buf, bool(block), - flags, offset, size, wait_for, event_out(evt)); - *map = convert_memory_map(*evt, queue, buf, res); - }); -} diff --git a/src/c_wrapper/memory_map.h b/src/c_wrapper/memory_map.h deleted file mode 100644 index 65a988a9..00000000 --- a/src/c_wrapper/memory_map.h +++ /dev/null @@ -1,37 +0,0 @@ -#include "error.h" -#include "command_queue.h" -#include "memory_object.h" - -#ifndef __PYOPENCL_MEMORY_MAP_H -#define __PYOPENCL_MEMORY_MAP_H - -class event; - -// {{{ memory_map - -extern template class clobj; -extern template void print_arg(std::ostream&, void *const&, bool); -extern template void print_buf(std::ostream&, void *const*, - size_t, ArgType, bool, bool); - -class memory_map : public clobj { -private: - mutable volatile std::atomic_bool m_valid; - command_queue m_queue; - memory_object m_mem; -public: - constexpr static const char *class_name = "MEMORY_MAP"; - PYOPENCL_INLINE - memory_map(const command_queue *queue, const memory_object *mem, void *ptr) - : clobj(ptr), m_valid(true), m_queue(*queue), m_mem(*mem) - {} - ~memory_map(); - void release(clobj_t *evt, const command_queue *queue, - const clobj_t *wait_for, uint32_t num_wait_for) const; - generic_info get_info(cl_uint) const; - intptr_t intptr() const; -}; - -// }}} - -#endif diff --git a/src/c_wrapper/memory_object.cpp b/src/c_wrapper/memory_object.cpp deleted file mode 100644 index 6f1ba321..00000000 --- a/src/c_wrapper/memory_object.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include "memory_object.h" -#include "context.h" -#include "event.h" -#include "command_queue.h" -#include "clhelper.h" - -template class clobj; -template void print_arg(std::ostream&, const cl_mem&, bool); -template void print_buf(std::ostream&, const cl_mem*, - size_t, ArgType, bool, bool); - -generic_info -memory_object::get_info(cl_uint param_name) const -{ - switch ((cl_mem_info)param_name) { - case CL_MEM_TYPE: - return pyopencl_get_int_info(cl_mem_object_type, MemObject, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_MEM_FLAGS: - return pyopencl_get_int_info(cl_mem_flags, MemObject, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_MEM_SIZE: - return pyopencl_get_int_info(size_t, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_MEM_HOST_PTR: - throw clerror("MemoryObject.get_info", CL_INVALID_VALUE, - "Use MemoryObject.get_host_array to get " - "host pointer."); - case CL_MEM_MAP_COUNT: - case CL_MEM_REFERENCE_COUNT: - return pyopencl_get_int_info(cl_uint, MemObject, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_MEM_CONTEXT: - return pyopencl_get_opaque_info(context, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); - -#if PYOPENCL_CL_VERSION >= 0x1010 - // TODO - // case CL_MEM_ASSOCIATED_MEMOBJECT: - // { - // cl_mem param_value; - // PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, (this, param_name, sizeof(param_value), ¶m_value, 0)); - // if (param_value == 0) - // { - // // no associated memory object? no problem. - // return py::object(); - // } - - // return create_mem_object_wrapper(param_value); - // } - case CL_MEM_OFFSET: - return pyopencl_get_int_info(size_t, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - case CL_MEM_USES_SVM_POINTER: - return pyopencl_get_int_info(cl_bool, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif - - default: - throw clerror("MemoryObject.get_info", CL_INVALID_VALUE); - } -} - -memory_object::~memory_object() -{ - if (!m_valid.exchange(false)) - return; - pyopencl_call_guarded_cleanup(clReleaseMemObject, PYOPENCL_CL_CASTABLE_THIS); -} - -// c wrapper - -// Memory Object -error* -memory_object__release(clobj_t obj) -{ - return c_handle_error([&] { - static_cast(obj)->release(); - }); -} - -error* -memory_object__get_host_array(clobj_t _obj, void **hostptr, size_t *size) -{ - auto obj = static_cast(_obj); - return c_handle_error([&] { - cl_mem_flags flags; - pyopencl_call_guarded(clGetMemObjectInfo, obj, CL_MEM_FLAGS, - size_arg(flags), nullptr); - if (!(flags & CL_MEM_USE_HOST_PTR)) - throw clerror("MemoryObject.get_host_array", CL_INVALID_VALUE, - "Only MemoryObject with USE_HOST_PTR " - "is supported."); - pyopencl_call_guarded(clGetMemObjectInfo, obj, CL_MEM_HOST_PTR, - size_arg(*hostptr), nullptr); - pyopencl_call_guarded(clGetMemObjectInfo, obj, CL_MEM_SIZE, - size_arg(*size), nullptr); - }); -} - -error* -enqueue_migrate_mem_objects(clobj_t *evt, clobj_t _queue, - const clobj_t *_mem_obj, uint32_t num_mem_obj, - cl_mem_migration_flags flags, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - const auto mem_obj = buf_from_class(_mem_obj, num_mem_obj); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueMigrateMemObjects, queue, - mem_obj, flags, wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueMigrateMemObjects, "CL 1.2") -#endif -} diff --git a/src/c_wrapper/memory_object.h b/src/c_wrapper/memory_object.h deleted file mode 100644 index 635dc470..00000000 --- a/src/c_wrapper/memory_object.h +++ /dev/null @@ -1,56 +0,0 @@ -#include "error.h" -#include - -#ifndef __PYOPENCL_MEMORY_OBJECT_H -#define __PYOPENCL_MEMORY_OBJECT_H - -// {{{ memory_object - -extern template class clobj; -extern template void print_arg(std::ostream&, const cl_mem&, bool); -extern template void print_buf(std::ostream&, const cl_mem*, - size_t, ArgType, bool, bool); - -class memory_object : public clobj { -private: - mutable volatile std::atomic_bool m_valid; -public: - constexpr static const char *class_name = "MEMORY_OBJECT"; - PYOPENCL_INLINE - memory_object(cl_mem mem, bool retain) - : clobj(mem), m_valid(true) - { - if (retain) { - pyopencl_call_guarded(clRetainMemObject, PYOPENCL_CL_CASTABLE_THIS); - } - } - PYOPENCL_INLINE - memory_object(const memory_object &mem) - : memory_object(mem.data(), true) - {} - ~memory_object(); - generic_info get_info(cl_uint param_name) const; - void - release() const - { - if (PYOPENCL_UNLIKELY(!m_valid.exchange(false))) { - throw clerror("MemoryObject.release", CL_INVALID_VALUE, - "trying to double-unref mem object"); - } - pyopencl_call_guarded(clReleaseMemObject, PYOPENCL_CL_CASTABLE_THIS); - } -#if 0 - PYOPENCL_USE_RESULT size_t - size() const - { - size_t param_value; - pyopencl_call_guarded(clGetMemObjectInfo, this, CL_MEM_SIZE, - size_arg(param_value), nullptr); - return param_value; - } -#endif -}; - -// }}} - -#endif diff --git a/src/c_wrapper/mingw-std-threads b/src/c_wrapper/mingw-std-threads deleted file mode 160000 index 776ce7fa..00000000 --- a/src/c_wrapper/mingw-std-threads +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 776ce7faf9368ec9588ee77458799c281cb25737 diff --git a/src/c_wrapper/platform.cpp b/src/c_wrapper/platform.cpp deleted file mode 100644 index 21a896b2..00000000 --- a/src/c_wrapper/platform.cpp +++ /dev/null @@ -1,109 +0,0 @@ -#include "platform.h" -#include "device.h" -#include "clhelper.h" - -#include - -template class clobj; -template void print_arg(std::ostream&, - const cl_platform_id&, bool); -template void print_clobj(std::ostream&, const platform*); -template void print_buf(std::ostream&, const cl_platform_id*, - size_t, ArgType, bool, bool); - -generic_info -platform::get_info(cl_uint param_name) const -{ - switch ((cl_platform_info)param_name) { - case CL_PLATFORM_PROFILE: - case CL_PLATFORM_VERSION: - case CL_PLATFORM_NAME: - case CL_PLATFORM_VENDOR: -#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) - case CL_PLATFORM_EXTENSIONS: -#endif - return pyopencl_get_str_info(Platform, PYOPENCL_CL_CASTABLE_THIS, param_name); - default: - throw clerror("Platform.get_info", CL_INVALID_VALUE); - } -} - -void -platform::get_version(cl_platform_id plat, int *major, int *minor) -{ - char s_buff[128]; - size_t size; - pyopencl_buf d_buff(0); - char *name = s_buff; - pyopencl_call_guarded(clGetPlatformInfo, plat, CL_PLATFORM_VERSION, - 0, nullptr, buf_arg(size)); - if (PYOPENCL_UNLIKELY(size > sizeof(s_buff))) { - d_buff.resize(size); - name = d_buff.get(); - } - pyopencl_call_guarded(clGetPlatformInfo, plat, CL_PLATFORM_VERSION, - size_arg(name, size), buf_arg(size)); - *major = *minor = -1; - sscanf(name, "OpenCL %d.%d", major, minor); - // Well, hopefully there won't be a negative OpenCL version =) - if (*major < 0 || *minor < 0) { - throw clerror("Platform.get_version", CL_INVALID_VALUE, - "platform returned non-conformant " - "platform version string"); - } -} - -// c wrapper - -error* -get_platforms(clobj_t **_platforms, uint32_t *num_platforms) -{ - return c_handle_error([&] { - *num_platforms = 0; - pyopencl_call_guarded(clGetPlatformIDs, 0, nullptr, - buf_arg(*num_platforms)); - pyopencl_buf platforms(*num_platforms); - pyopencl_call_guarded(clGetPlatformIDs, platforms, - buf_arg(*num_platforms)); - *_platforms = buf_to_base(platforms).release(); - }); -} - -error* -platform__get_devices(clobj_t _plat, clobj_t **_devices, - uint32_t *num_devices, cl_device_type devtype) -{ - auto plat = static_cast(_plat); - return c_handle_error([&] { - *num_devices = 0; - try { - pyopencl_call_guarded(clGetDeviceIDs, plat, devtype, 0, nullptr, - buf_arg(*num_devices)); - } catch (const clerror &e) { - if (e.code() != CL_DEVICE_NOT_FOUND) - throw e; - *num_devices = 0; - } - if (*num_devices == 0) { - *_devices = nullptr; - return; - } - pyopencl_buf devices(*num_devices); - pyopencl_call_guarded(clGetDeviceIDs, plat, devtype, devices, - buf_arg(*num_devices)); - *_devices = buf_to_base(devices).release(); - }); -} - -error* -platform__unload_compiler(clobj_t plat) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - return c_handle_error([&] { - pyopencl_call_guarded(clUnloadPlatformCompiler, - static_cast(plat)); - }); -#else - PYOPENCL_UNSUPPORTED(clUnloadPlatformCompiler, "CL 1.1 and below") -#endif -} diff --git a/src/c_wrapper/platform.h b/src/c_wrapper/platform.h deleted file mode 100644 index 1bad5c29..00000000 --- a/src/c_wrapper/platform.h +++ /dev/null @@ -1,27 +0,0 @@ -#include "error.h" - -#ifndef __PYOPENCL_PLATFORM_H -#define __PYOPENCL_PLATFORM_H - -// {{{ platform - -extern template class clobj; -extern template void print_arg(std::ostream&, - const cl_platform_id&, bool); -extern template void print_buf( - std::ostream&, const cl_platform_id*, size_t, ArgType, bool, bool); - -class platform : public clobj { -public: - static void get_version(cl_platform_id plat, int *major, int *minor); - using clobj::clobj; - PYOPENCL_DEF_CL_CLASS(PLATFORM); - - generic_info get_info(cl_uint param_name) const; -}; - -extern template void print_clobj(std::ostream&, const platform*); - -// }}} - -#endif diff --git a/src/c_wrapper/program.cpp b/src/c_wrapper/program.cpp deleted file mode 100644 index a0535c06..00000000 --- a/src/c_wrapper/program.cpp +++ /dev/null @@ -1,269 +0,0 @@ -#include "program.h" -#include "device.h" -#include "context.h" -#include "clhelper.h" -#include "kernel.h" - -template class clobj; -template void print_arg(std::ostream&, const cl_program&, bool); -template void print_clobj(std::ostream&, const program*); -template void print_buf(std::ostream&, const cl_program*, - size_t, ArgType, bool, bool); - -PYOPENCL_USE_RESULT static PYOPENCL_INLINE program* -new_program(cl_program prog, program_kind_type progkind=KND_UNKNOWN) -{ - return pyopencl_convert_obj(program, clReleaseProgram, prog, progkind); -} - -program::~program() -{ - pyopencl_call_guarded_cleanup(clReleaseProgram, PYOPENCL_CL_CASTABLE_THIS); -} - -generic_info -program::get_info(cl_uint param) const -{ - switch ((cl_program_info)param) { - case CL_PROGRAM_CONTEXT: - return pyopencl_get_opaque_info(context, Program, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_PROGRAM_REFERENCE_COUNT: - case CL_PROGRAM_NUM_DEVICES: - return pyopencl_get_int_info(cl_uint, Program, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_PROGRAM_DEVICES: - return pyopencl_get_opaque_array_info(device, Program, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_PROGRAM_SOURCE: - return pyopencl_get_str_info(Program, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_PROGRAM_BINARY_SIZES: - return pyopencl_get_array_info(size_t, Program, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_PROGRAM_BINARIES: { - auto sizes = pyopencl_get_vec_info(size_t, Program, PYOPENCL_CL_CASTABLE_THIS, - CL_PROGRAM_BINARY_SIZES); - pyopencl_buf result_ptrs(sizes.len()); - for (size_t i = 0;i < sizes.len();i++) { - result_ptrs[i] = (char*)malloc(sizes[i]); - } - try { - pyopencl_call_guarded(clGetProgramInfo, PYOPENCL_CL_CASTABLE_THIS, CL_PROGRAM_BINARIES, - sizes.len() * sizeof(char*), - result_ptrs.get(), nullptr); - } catch (...) { - for (size_t i = 0;i < sizes.len();i++) { - free(result_ptrs[i]); - } - } - pyopencl_buf gis(sizes.len()); - for (size_t i = 0;i < sizes.len();i++) { - gis[i] = make_generic_info( - CLASS_NONE, - _copy_str(std::string("char[") + tostring(sizes[i]) + "]"), - true, - result_ptrs[i], - true); - } - return pyopencl_convert_array_info(generic_info, gis); - } - -#if PYOPENCL_CL_VERSION >= 0x1020 - case CL_PROGRAM_NUM_KERNELS: - return pyopencl_get_int_info(size_t, Program, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_PROGRAM_KERNEL_NAMES: - return pyopencl_get_str_info(Program, PYOPENCL_CL_CASTABLE_THIS, param); -#endif - default: - throw clerror("Program.get_info", CL_INVALID_VALUE); - } -} - -generic_info -program::get_build_info(const device *dev, cl_program_build_info param) const -{ - switch (param) { - case CL_PROGRAM_BUILD_STATUS: - return pyopencl_get_int_info(cl_build_status, ProgramBuild, - PYOPENCL_CL_CASTABLE_THIS, dev, param); - case CL_PROGRAM_BUILD_OPTIONS: - case CL_PROGRAM_BUILD_LOG: - return pyopencl_get_str_info(ProgramBuild, PYOPENCL_CL_CASTABLE_THIS, dev, param); -#if PYOPENCL_CL_VERSION >= 0x1020 - case CL_PROGRAM_BINARY_TYPE: - return pyopencl_get_int_info(cl_program_binary_type, ProgramBuild, - PYOPENCL_CL_CASTABLE_THIS, dev, param); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE: - return pyopencl_get_int_info(size_t, ProgramBuild, - PYOPENCL_CL_CASTABLE_THIS, dev, param); -#endif - default: - throw clerror("Program.get_build_info", CL_INVALID_VALUE); - } -} - -#if PYOPENCL_CL_VERSION >= 0x1020 -void -program::compile(const char *opts, const clobj_t *_devs, size_t num_devs, - const clobj_t *_prgs, const char *const *names, - size_t num_hdrs) -{ - const auto devs = buf_from_class(_devs, num_devs); - const auto prgs = buf_from_class(_prgs, num_hdrs); - pyopencl_call_guarded(clCompileProgram, PYOPENCL_CL_CASTABLE_THIS, devs, opts, prgs, - buf_arg(names, num_hdrs), nullptr, nullptr); -} -#endif - -pyopencl_buf -program::all_kernels() -{ - cl_uint num_knls; - pyopencl_call_guarded(clCreateKernelsInProgram, PYOPENCL_CL_CASTABLE_THIS, 0, nullptr, - buf_arg(num_knls)); - pyopencl_buf knls(num_knls); - pyopencl_call_guarded(clCreateKernelsInProgram, PYOPENCL_CL_CASTABLE_THIS, knls, - buf_arg(num_knls)); - return buf_to_base(knls, true); -} - -// c wrapper - -// Program -error* -create_program_with_source(clobj_t *prog, clobj_t _ctx, const char *_src) -{ - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - const auto &src = _src; - const size_t length = strlen(src); - cl_program result = pyopencl_call_guarded( - clCreateProgramWithSource, ctx, len_arg(src), buf_arg(length)); - *prog = new_program(result, KND_SOURCE); - }); -} - -error* -create_program_with_il(clobj_t *prog, clobj_t _ctx, void *il, size_t length) -{ -#if PYOPENCL_CL_VERSION >= 0x2010 - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - cl_program result = pyopencl_call_guarded( - clCreateProgramWithIL, ctx, il, length); - *prog = new_program(result, KND_SOURCE); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clCreateProgramWithIL, "CL 2.1") -#endif -} - -error* -create_program_with_binary(clobj_t *prog, clobj_t _ctx, - cl_uint num_devices, const clobj_t *devices, - const unsigned char **binaries, size_t *binary_sizes) -{ - auto ctx = static_cast(_ctx); - const auto devs = buf_from_class(devices, num_devices); - pyopencl_buf binary_statuses(num_devices); - return c_handle_error([&] { - cl_program result = pyopencl_call_guarded( - clCreateProgramWithBinary, ctx, devs, - binary_sizes, binaries, binary_statuses.get()); - // for (cl_uint i = 0; i < num_devices; ++i) - // std::cout << i << ":" << binary_statuses[i] << std::endl; - *prog = new_program(result, KND_BINARY); - }); -} - -error* -program__build(clobj_t _prog, const char *options, - cl_uint num_devices, const clobj_t *_devices) -{ - auto prog = static_cast(_prog); - const auto devices = buf_from_class(_devices, num_devices); - return c_handle_error([&] { - pyopencl_call_guarded(clBuildProgram, prog, devices, options, - nullptr, nullptr); - }); -} - -error* -program__kind(clobj_t prog, int *kind) -{ - return c_handle_error([&] { - *kind = static_cast(prog)->kind(); - }); -} - -error* -program__get_build_info(clobj_t _prog, clobj_t _dev, - cl_program_build_info param, generic_info *out) -{ - auto prog = static_cast(_prog); - auto dev = static_cast(_dev); - return c_handle_error([&] { - *out = prog->get_build_info(dev, param); - }); -} - -error* -program__create_with_builtin_kernels(clobj_t *_prg, clobj_t _ctx, - const clobj_t *_devs, uint32_t num_devs, - const char *names) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - const auto devs = buf_from_class(_devs, num_devs); - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - auto prg = pyopencl_call_guarded(clCreateProgramWithBuiltInKernels, - ctx, devs, names); - *_prg = new_program(prg); - }); -#else - PYOPENCL_UNSUPPORTED(clCreateProgramWithBuiltInKernels, "CL 1.1 and below") -#endif -} - -error* -program__compile(clobj_t _prg, const char *opts, const clobj_t *_devs, - size_t num_devs, const clobj_t *_prgs, - const char *const *names, size_t num_hdrs) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto prg = static_cast(_prg); - return c_handle_error([&] { - prg->compile(opts, _devs, num_devs, _prgs, names, num_hdrs); - }); -#else - PYOPENCL_UNSUPPORTED(clCompileProgram, "CL 1.1 and below") -#endif -} - -error* -program__link(clobj_t *_prg, clobj_t _ctx, const clobj_t *_prgs, - size_t num_prgs, const char *opts, const clobj_t *_devs, - size_t num_devs) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - const auto devs = buf_from_class(_devs, num_devs); - const auto prgs = buf_from_class(_prgs, num_prgs); - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - auto prg = pyopencl_call_guarded(clLinkProgram, ctx, devs, opts, - prgs, nullptr, nullptr); - *_prg = new_program(prg); - }); -#else - PYOPENCL_UNSUPPORTED(clLinkProgram, "CL 1.1 and below") -#endif -} - -error* -program__all_kernels(clobj_t _prg, clobj_t **_knl, uint32_t *size) -{ - auto prg = static_cast(_prg); - return c_handle_error([&] { - auto knls = prg->all_kernels(); - *size = knls.len(); - *_knl = knls.release(); - }); -} diff --git a/src/c_wrapper/program.h b/src/c_wrapper/program.h deleted file mode 100644 index 63d2fc76..00000000 --- a/src/c_wrapper/program.h +++ /dev/null @@ -1,58 +0,0 @@ -#include "clhelper.h" - -#ifndef __PYOPENCL_PROGRAM_H -#define __PYOPENCL_PROGRAM_H - -class device; - -// {{{ program - -extern template class clobj; -extern template void print_arg(std::ostream&, - const cl_program&, bool); -extern template void print_buf(std::ostream&, const cl_program*, - size_t, ArgType, bool, bool); - -class program : public clobj { -private: - program_kind_type m_program_kind; - -public: - PYOPENCL_DEF_CL_CLASS(PROGRAM); - PYOPENCL_INLINE - program(cl_program prog, bool retain, - program_kind_type progkind=KND_UNKNOWN) - : clobj(prog), m_program_kind(progkind) - { - if (retain) { - pyopencl_call_guarded(clRetainProgram, PYOPENCL_CL_CASTABLE_THIS); - } - } - ~program(); - PYOPENCL_USE_RESULT PYOPENCL_INLINE program_kind_type - kind() const - { - return m_program_kind; - } - PYOPENCL_USE_RESULT pyopencl_buf - get_info__devices() const - { - return pyopencl_get_vec_info(cl_device_id, Program, PYOPENCL_CL_CASTABLE_THIS, - CL_PROGRAM_DEVICES); - } - generic_info get_info(cl_uint param_name) const; - PYOPENCL_USE_RESULT generic_info - get_build_info(const device *dev, cl_program_build_info param_name) const; -#if PYOPENCL_CL_VERSION >= 0x1020 - void compile(const char *opts, const clobj_t *_devs, size_t num_devs, - const clobj_t *_prgs, const char *const *names, - size_t num_hdrs); -#endif - pyopencl_buf all_kernels(); -}; - -extern template void print_clobj(std::ostream&, const program*); - -// }}} - -#endif diff --git a/src/c_wrapper/pyhelper.cpp b/src/c_wrapper/pyhelper.cpp deleted file mode 100644 index 7397d12b..00000000 --- a/src/c_wrapper/pyhelper.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "pyhelper.h" - -namespace py { -WrapFunc gc; -WrapFunc ref; -WrapFunc deref; -WrapFunc call; -} - -void -set_py_funcs(int (*_gc)(), void *(*_ref)(void*), void (*_deref)(void*), - void (*_call)(void*, cl_int)) -{ - py::gc = _gc; - py::ref = _ref; - py::deref = _deref; - py::call = _call; -} diff --git a/src/c_wrapper/pyhelper.h b/src/c_wrapper/pyhelper.h deleted file mode 100644 index 50c08402..00000000 --- a/src/c_wrapper/pyhelper.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef __PYOPENCL_PYHELPER_H -#define __PYOPENCL_PYHELPER_H - -#include "wrap_cl.h" -#include "function.h" - -template -class WrapFunc; - -template -class WrapFunc { - typedef Ret (*_FuncType)(Args...); - _FuncType m_func; - static PYOPENCL_INLINE _FuncType - check_func(_FuncType f) - { - return f ? f : ([] (Args...) {return Ret();}); - } -public: - WrapFunc(_FuncType func=nullptr) - : m_func(check_func(func)) - {} - Ret - operator()(Args... args) - { - return m_func(std::forward(args)...); - } - WrapFunc& - operator=(_FuncType func) - { - m_func = check_func(func); - return *this; - } -}; - -namespace py { -extern WrapFunc gc; -extern WrapFunc ref; -extern WrapFunc deref; -extern WrapFunc call; -} - -#endif diff --git a/src/c_wrapper/pyopencl_ext.h b/src/c_wrapper/pyopencl_ext.h deleted file mode 100644 index a9792d8b..00000000 --- a/src/c_wrapper/pyopencl_ext.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef _PYOPENCL_EXT_H -#define _PYOPENCL_EXT_H - -#ifdef PYOPENCL_USE_SHIPPED_EXT - -#include "clinfo_ext.h" - -#else - -#if (defined(__APPLE__) && !defined(PYOPENCL_APPLE_USE_CL_H)) - -#include - -#else - -#include -#include - -#endif - -#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD -#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 - -typedef union -{ - struct { cl_uint type; cl_uint data[5]; } raw; - struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; -} cl_device_topology_amd; -#endif - -/* {{{ these NV defines are often missing from the system headers */ - -#ifndef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV -#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 -#endif -#ifndef CL_DEVICE_INTEGRATED_MEMORY_NV -#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 -#endif - -#ifndef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV -#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007 -#endif - -#ifndef CL_DEVICE_PCI_BUS_ID_NV -#define CL_DEVICE_PCI_BUS_ID_NV 0x4008 -#endif - -#ifndef CL_DEVICE_PCI_SLOT_ID_NV -#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 -#endif - -/* }}} */ - -#endif - -#endif - -/* vim: foldmethod=marker */ diff --git a/src/c_wrapper/sampler.cpp b/src/c_wrapper/sampler.cpp deleted file mode 100644 index b373c783..00000000 --- a/src/c_wrapper/sampler.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include "sampler.h" -#include "context.h" -#include "clhelper.h" - -template class clobj; -template void print_arg(std::ostream&, const cl_sampler&, bool); -template void print_clobj(std::ostream&, const sampler*); -template void print_buf(std::ostream&, const cl_sampler*, - size_t, ArgType, bool, bool); - -sampler::~sampler() -{ - pyopencl_call_guarded_cleanup(clReleaseSampler, PYOPENCL_CL_CASTABLE_THIS); -} - -generic_info -sampler::get_info(cl_uint param_name) const -{ - switch ((cl_sampler_info)param_name) { - case CL_SAMPLER_REFERENCE_COUNT: - return pyopencl_get_int_info(cl_uint, Sampler, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_SAMPLER_CONTEXT: - return pyopencl_get_opaque_info(context, Sampler, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_SAMPLER_ADDRESSING_MODE: - return pyopencl_get_int_info(cl_addressing_mode, Sampler, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_SAMPLER_FILTER_MODE: - return pyopencl_get_int_info(cl_filter_mode, Sampler, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_SAMPLER_NORMALIZED_COORDS: - return pyopencl_get_int_info(cl_bool, Sampler, PYOPENCL_CL_CASTABLE_THIS, param_name); - -#if PYOPENCL_CL_VERSION >= 0x2000 - // TODO: MIP_FILTER_MODE, LOD_MIN, LOD_MAX -#endif - - default: - throw clerror("Sampler.get_info", CL_INVALID_VALUE); - } -} - -// c wrapper - -// Sampler -error* -create_sampler(clobj_t *samp, clobj_t _ctx, int norm_coords, - cl_addressing_mode am, cl_filter_mode fm) -{ - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - *samp = new sampler(pyopencl_call_guarded(clCreateSampler, ctx, - norm_coords, am, fm), - false); - }); -} diff --git a/src/c_wrapper/sampler.h b/src/c_wrapper/sampler.h deleted file mode 100644 index 404b82e5..00000000 --- a/src/c_wrapper/sampler.h +++ /dev/null @@ -1,33 +0,0 @@ -#include "error.h" - -#ifndef __PYOPENCL_SAMPLER_H -#define __PYOPENCL_SAMPLER_H - -// {{{ sampler - -extern template class clobj; -extern template void print_arg(std::ostream&, - const cl_sampler&, bool); -extern template void print_buf(std::ostream&, const cl_sampler*, - size_t, ArgType, bool, bool); - -class sampler : public clobj { -public: - PYOPENCL_DEF_CL_CLASS(SAMPLER); - PYOPENCL_INLINE - sampler(cl_sampler samp, bool retain) - : clobj(samp) - { - if (retain) { - pyopencl_call_guarded(clRetainSampler, PYOPENCL_CL_CASTABLE_THIS); - } - } - ~sampler(); - generic_info get_info(cl_uint param_name) const; -}; - -extern template void print_clobj(std::ostream&, const sampler*); - -// }}} - -#endif diff --git a/src/c_wrapper/svm.cpp b/src/c_wrapper/svm.cpp deleted file mode 100644 index 8452ec99..00000000 --- a/src/c_wrapper/svm.cpp +++ /dev/null @@ -1,173 +0,0 @@ -#include "context.h" -#include "command_queue.h" -#include "event.h" - -error* -svm_alloc( - clobj_t _ctx, cl_mem_flags flags, size_t size, cl_uint alignment, - void **result) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - auto ctx = static_cast(_ctx); - return c_handle_retry_mem_error([&] { - *result = clSVMAlloc(ctx->data(), flags, size, alignment); - if (!*result) - throw clerror("clSVMalloc", CL_INVALID_VALUE, - "(allocation failure, unspecified reason)"); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clSVMAlloc, "CL 2.0") -#endif -} - - -error* -svm_free(clobj_t _ctx, void *svm_pointer) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - auto ctx = static_cast(_ctx); - // no error returns (?!) - clSVMFree(ctx->data(), svm_pointer); - return nullptr; -#else - PYOPENCL_UNSUPPORTED_BEFORE(clSVMFree, "CL 2.0") -#endif -} - - -error* -enqueue_svm_free( - clobj_t *evt, clobj_t _queue, - cl_uint num_svm_pointers, - void *svm_pointers[], - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueSVMFree, queue, - num_svm_pointers, svm_pointers, - /* pfn_free_func*/ nullptr, - /* user_data */ nullptr, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueSVMFree, "CL 2.0") -#endif -} - - -error* -enqueue_svm_memcpy( - clobj_t *evt, clobj_t _queue, - cl_bool is_blocking, - void *dst_ptr, const void *src_ptr, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for, void *pyobj) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueSVMMemcpy, queue, - is_blocking, - dst_ptr, src_ptr, size, - wait_for, nanny_event_out(evt, pyobj)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueSVMMemcpy, "CL 2.0") -#endif -} - - -error* -enqueue_svm_memfill( - clobj_t *evt, clobj_t _queue, - void *svm_ptr, - const void *pattern, size_t pattern_size, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueSVMMemFill, queue, - svm_ptr, - pattern, pattern_size, size, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueSVMMemFill, "CL 2.0") -#endif -} - - -error* -enqueue_svm_map( - clobj_t *evt, clobj_t _queue, - cl_bool blocking_map, cl_map_flags map_flags, - void *svm_ptr, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueSVMMap, queue, - blocking_map, map_flags, - svm_ptr, size, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueSVMMap, "CL 2.0") -#endif -} - - -error* -enqueue_svm_unmap( - clobj_t *evt, clobj_t _queue, - void *svm_ptr, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueSVMUnmap, queue, - svm_ptr, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueSVMUnmap, "CL 2.0") -#endif -} - - -error* -enqueue_svm_migrate_mem( - clobj_t *evt, clobj_t _queue, - cl_uint num_svm_pointers, - const void **svm_pointers, - const size_t *sizes, - cl_mem_migration_flags flags, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x2010 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueSVMMigrateMem, queue, - num_svm_pointers, svm_pointers, sizes, flags, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueSVMMigrateMem, "CL 2.1") -#endif -} diff --git a/src/c_wrapper/svm.h b/src/c_wrapper/svm.h deleted file mode 100644 index c0e39ec4..00000000 --- a/src/c_wrapper/svm.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __PYOPENCL_SVM_H -#define __PYOPENCL_SVM_H - -#endif diff --git a/src/c_wrapper/utils.h b/src/c_wrapper/utils.h deleted file mode 100644 index d1bbb7d0..00000000 --- a/src/c_wrapper/utils.h +++ /dev/null @@ -1,551 +0,0 @@ -#include "wrap_cl.h" -#include "function.h" -#include "debug.h" - -#include -#include -#include -#include - -#ifndef __PYOPENCL_UTILS_H -#define __PYOPENCL_UTILS_H - -#if (defined(__GNUC__) && (__GNUC__ > 2)) -# define PYOPENCL_EXPECT(exp, var) __builtin_expect(exp, var) -#else -# define PYOPENCL_EXPECT(exp, var) (exp) -#endif - -#define PYOPENCL_LIKELY(x) PYOPENCL_EXPECT(bool(x), true) -#define PYOPENCL_UNLIKELY(x) PYOPENCL_EXPECT(bool(x), false) - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE std::string -tostring(const T& v) -{ - std::ostringstream ostr; - ostr << v; - return ostr.str(); -} - -template -struct CLGenericArgPrinter { - static PYOPENCL_INLINE void - print(std::ostream &stm, T &arg) - { - stm << arg; - } -}; - -PYOPENCL_USE_RESULT static PYOPENCL_INLINE void* -cl_memdup(const void *p, size_t size) -{ - void *res = malloc(size); - memcpy(res, p, size); - return res; -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE T* -cl_memdup(const T *p) -{ - // Not supported by libstdc++ yet... - // static_assert(std::is_trivially_copy_constructible::value); - return static_cast(cl_memdup(static_cast(p), sizeof(T))); -} - -enum class ArgType { - None, - SizeOf, - Length, -}; - -template -struct type_size : std::integral_constant {}; -template -struct type_size, void>::value> > : - std::integral_constant {}; - -template -static PYOPENCL_INLINE void -_print_buf_content(std::ostream &stm, const T *p, size_t len) -{ - if (len > 1) { - stm << "["; - } - for (size_t i = 0;i < len;i++) { - CLGenericArgPrinter::print(stm, p[i]); - if (i != len - 1) { - stm << ", "; - } - } - if (len > 1) { - stm << "]"; - } -} - -template<> -PYOPENCL_INLINE void -_print_buf_content(std::ostream &stm, const char *p, size_t len) -{ - dbg_print_str(stm, p, len); -} - -template<> -PYOPENCL_INLINE void -_print_buf_content(std::ostream &stm, - const unsigned char *p, size_t len) -{ - dbg_print_bytes(stm, p, len); -} - -template<> -PYOPENCL_INLINE void -_print_buf_content(std::ostream &stm, const void *p, size_t len) -{ - dbg_print_bytes(stm, static_cast(p), len); -} - -template -void -print_buf(std::ostream &stm, const T *p, size_t len, - ArgType arg_type, bool content, bool out) -{ - const size_t ele_size = type_size::value; - if (out) { - stm << "*(" << (const void*)p << "): "; - if (p) { - _print_buf_content(stm, p, len); - } else { - stm << "NULL"; - } - } else { - bool need_quote = content || arg_type != ArgType::None; - if (content) { - if (p) { - _print_buf_content(stm, p, len); - stm << " "; - } else { - stm << "NULL "; - } - } - if (need_quote) { - stm << "<"; - } - switch (arg_type) { - case ArgType::SizeOf: - stm << ele_size * len << ", "; - break; - case ArgType::Length: - stm << len << ", "; - break; - default: - break; - } - stm << (const void*)p; - if (need_quote) { - stm << ">"; - } - } -} - -template -void -print_arg(std::ostream &stm, const T &v, bool out) -{ - if (!out) { - stm << (const void*)&v; - } else { - stm << "*(" << (const void*)&v << "): " << v; - } -} -extern template void print_buf(std::ostream&, const char*, size_t, - ArgType, bool, bool); -extern template void print_buf(std::ostream&, const cl_int*, size_t, - ArgType, bool, bool); -extern template void print_buf(std::ostream&, const cl_uint*, size_t, - ArgType, bool, bool); -extern template void print_buf(std::ostream&, const cl_long*, size_t, - ArgType, bool, bool); -extern template void print_buf(std::ostream&, const cl_ulong*, size_t, - ArgType, bool, bool); -extern template void print_buf(std::ostream&, - const cl_image_format*, size_t, - ArgType, bool, bool); - -template<> -struct CLGenericArgPrinter { - static PYOPENCL_INLINE void - print(std::ostream &stm, std::nullptr_t&) - { - stm << (void*)nullptr; - } -}; - -template -struct CLGenericArgPrinter< - T, enable_if_t >::value || - std::is_same >::value> > { - static PYOPENCL_INLINE void - print(std::ostream &stm, const char *str) - { - dbg_print_str(stm, str); - } -}; - -template -class CLArg { -private: - T &m_arg; -public: - CLArg(T &arg) noexcept - : m_arg(arg) - {} - CLArg(CLArg &&other) noexcept - : m_arg(other.m_arg) - {} - PYOPENCL_INLINE T& - convert() noexcept - { - return m_arg; - } - PYOPENCL_INLINE void - print(std::ostream &stm) - { - CLGenericArgPrinter::print(stm, m_arg); - } -}; - -template<> -class CLArg : public CLArg { - cl_bool m_arg; -public: - CLArg(bool arg) noexcept - : CLArg(m_arg), m_arg(arg ? CL_TRUE : CL_FALSE) - {} - CLArg(CLArg &&other) noexcept - : CLArg(bool(other.m_arg)) - {} - PYOPENCL_INLINE void - print(std::ostream &stm) - { - stm << (m_arg ? "true" : "false"); - } -}; - -template -class ArgBuffer { -private: - T *m_buf; - size_t m_len; -protected: - PYOPENCL_INLINE void - set(T *buf) noexcept - { - m_buf = buf; - } -public: - typedef T type; - constexpr static ArgType arg_type = AT; - ArgBuffer(T *buf, size_t l) noexcept - : m_buf(buf), m_len(l) - {} - ArgBuffer(ArgBuffer &&other) noexcept - : ArgBuffer(other.m_buf, other.m_len) - {} - PYOPENCL_INLINE rm_const_t* - get() const noexcept - { - return const_cast*>(m_buf); - } - template - PYOPENCL_INLINE T2& - operator[](int i) const - { - return m_buf[i]; - } - PYOPENCL_INLINE size_t - len() const noexcept - { - return m_len; - } -}; - -template -struct _ToArgBuffer { - static PYOPENCL_INLINE ArgBuffer, AT> - convert(T &buf) - { - return ArgBuffer, AT>(&buf, 1); - } -}; - -template -static PYOPENCL_INLINE auto -buf_arg(T &&buf) -> decltype(_ToArgBuffer::convert(std::forward(buf))) -{ - return _ToArgBuffer::convert(std::forward(buf)); -} - -template -static PYOPENCL_INLINE ArgBuffer -buf_arg(T *buf, size_t l) -{ - return ArgBuffer(buf, l); -} - -template -static PYOPENCL_INLINE auto -size_arg(T&&... buf) - -> decltype(buf_arg(std::forward(buf)...)) -{ - return buf_arg(std::forward(buf)...); -} - -template -static PYOPENCL_INLINE auto -len_arg(T&&... buf) - -> decltype(buf_arg(std::forward(buf)...)) -{ - return buf_arg(std::forward(buf)...); -} - -template -struct _ArgBufferConverter; - -template -struct _ArgBufferConverter > { - static PYOPENCL_INLINE auto - convert(Buff &buff) -> decltype(buff.get()) - { - return buff.get(); - } -}; - -template -struct _ArgBufferConverter > { - static PYOPENCL_INLINE auto - convert(Buff &buff) - -> decltype(std::make_tuple(type_size::value * - buff.len(), buff.get())) - { - return std::make_tuple(type_size::value * - buff.len(), buff.get()); - } -}; - -template -struct _ArgBufferConverter > { - static PYOPENCL_INLINE auto - convert(Buff &buff) -> decltype(std::make_tuple(buff.len(), buff.get())) - { - return std::make_tuple(buff.len(), buff.get()); - } -}; - -template -class CLArg, - Buff>::value> > { -private: - Buff &m_buff; -public: - constexpr static bool is_out = !std::is_const::value; - CLArg(Buff &buff) noexcept - : m_buff(buff) - {} - CLArg(CLArg &&other) noexcept - : m_buff(other.m_buff) - {} - PYOPENCL_INLINE auto - convert() const noexcept - -> decltype(_ArgBufferConverter::convert(m_buff)) - { - return _ArgBufferConverter::convert(m_buff); - } - PYOPENCL_INLINE void - print(std::ostream &stm, bool out=false) - { - print_buf(stm, m_buff.get(), m_buff.len(), - Buff::arg_type, out || !is_out, out); - } -}; - -template -class ConstBuffer : public ArgBuffer { -private: - T m_intern_buf[n]; - ConstBuffer(ConstBuffer&&) = delete; - ConstBuffer() = delete; -public: - ConstBuffer(const T *buf, size_t l, T content=0) - : ArgBuffer(buf, n) - { - if (l < n) { - memcpy(m_intern_buf, buf, type_size::value * l); - for (size_t i = l;i < n;i++) { - m_intern_buf[i] = content; - } - this->set(m_intern_buf); - } - } -}; - -struct OutArg { -}; - -template -class CLArg::value> > { -private: - bool m_converted; - bool m_need_cleanup; - T &m_arg; -public: - constexpr static bool is_out = true; - CLArg(T &arg) - : m_converted(false), m_need_cleanup(false), m_arg(arg) - { - } - CLArg(CLArg &&other) noexcept - : m_converted(other.m_converted), m_need_cleanup(other.m_need_cleanup), - m_arg(other.m_arg) - { - other.m_need_cleanup = false; - } - PYOPENCL_INLINE auto - convert() -> decltype(m_arg.get()) - { - return m_arg.get(); - } - PYOPENCL_INLINE void - finish(bool converted) noexcept - { - m_need_cleanup = !converted; - } - PYOPENCL_INLINE void - post() - { - m_arg.convert(); - m_converted = true; - } - ~CLArg() - { - if (m_need_cleanup) { - m_arg.cleanup(m_converted); - } - } - PYOPENCL_INLINE void - print(std::ostream &stm, bool out=false) - { - m_arg.print(stm, out); - } -}; - -template -struct _D { - void operator()(T *p) { - free((void*)p); - } -}; - -template -class pyopencl_buf : public std::unique_ptr > { - size_t m_len; -public: - PYOPENCL_INLINE - pyopencl_buf(size_t len=1) - : std::unique_ptr >((T*)(len ? malloc(sizeof(T) * (len + 1)) : - nullptr)), m_len(len) - { - if (len) { - memset((void*)this->get(), 0, (len + 1) * sizeof(T)); - } - } - PYOPENCL_INLINE size_t - len() const - { - return m_len; - } - PYOPENCL_INLINE T& - operator[](int i) - { - return this->get()[i]; - } - PYOPENCL_INLINE const T& - operator[](int i) const - { - return this->get()[i]; - } - PYOPENCL_INLINE void - resize(size_t len) - { - if (len == m_len) - return; - m_len = len; - this->reset((T*)realloc((void*)this->release(), - (len + 1) * sizeof(T))); - } -}; - -template -using pyopencl_buf_ele_t = typename rm_ref_t::element_type; - -template -struct is_pyopencl_buf : std::false_type {}; - -template -struct is_pyopencl_buf< - T, enable_if_t >, - rm_ref_t >::value> > : std::true_type {}; - -template -struct _ToArgBuffer::value && - std::is_const >::value> > { - static PYOPENCL_INLINE ArgBuffer, AT> - convert(T &&buf) - { - return ArgBuffer, AT>(buf.get(), buf.len()); - } -}; - -template -struct _ToArgBuffer::value && - !std::is_const >::value> > { - static PYOPENCL_INLINE ArgBuffer, AT> - convert(T &&buf) - { - return ArgBuffer, AT>(buf.get(), buf.len()); - } -}; - -template -using __pyopencl_buf_arg_type = - rm_ref_t()))>; - -template -class CLArg::value> > - : public CLArg<__pyopencl_buf_arg_type > { - typedef __pyopencl_buf_arg_type BufType; - BufType m_buff; -public: - PYOPENCL_INLINE - CLArg(Buff &buff) noexcept - : CLArg(m_buff), m_buff(len_arg(buff)) - {} - PYOPENCL_INLINE - CLArg(CLArg &&other) noexcept - : CLArg(m_buff), m_buff(std::move(other.m_buff)) - {} -}; - -// FIXME -PYOPENCL_USE_RESULT static PYOPENCL_INLINE char* -_copy_str(const std::string& str) -{ - return strdup(str.c_str()); -} - -#endif diff --git a/src/c_wrapper/wrap_cl.cpp b/src/c_wrapper/wrap_cl.cpp deleted file mode 100644 index 1e001eb4..00000000 --- a/src/c_wrapper/wrap_cl.cpp +++ /dev/null @@ -1,123 +0,0 @@ -#include "pyhelper.h" -#include "clhelper.h" -#include "platform.h" -#include "device.h" -#include "context.h" -#include "command_queue.h" -#include "event.h" -#include "memory_object.h" -#include "image.h" -#include "gl_obj.h" -#include "memory_map.h" -#include "buffer.h" -#include "sampler.h" -#include "program.h" -#include "kernel.h" - -template void print_buf(std::ostream&, const char*, size_t, - ArgType, bool, bool); -template void print_buf(std::ostream&, const cl_int*, size_t, - ArgType, bool, bool); -template void print_buf(std::ostream&, const cl_uint*, size_t, - ArgType, bool, bool); -template void print_buf(std::ostream&, const cl_long*, size_t, - ArgType, bool, bool); -template void print_buf(std::ostream&, const cl_ulong*, size_t, - ArgType, bool, bool); -template void print_buf(std::ostream&, - const cl_image_format*, size_t, - ArgType, bool, bool); - -// {{{ c wrapper - -// Generic functions -int -get_cl_version() -{ - return PYOPENCL_CL_VERSION; -} - -void -free_pointer(void *p) -{ - free(p); -} - -void -free_pointer_array(void **p, uint32_t size) -{ - for (uint32_t i = 0;i < size;i++) { - free(p[i]); - } -} - - -intptr_t -clobj__int_ptr(clobj_t obj) -{ - return PYOPENCL_LIKELY(obj) ? obj->intptr() : 0l; -} - -static PYOPENCL_INLINE clobj_t -_from_int_ptr(intptr_t ptr, class_t class_, bool retain) -{ - switch(class_) { - case CLASS_PLATFORM: - return clobj_from_int_ptr(ptr, retain); - case CLASS_DEVICE: - return clobj_from_int_ptr(ptr, retain); - case CLASS_KERNEL: - return clobj_from_int_ptr(ptr, retain); - case CLASS_CONTEXT: - return clobj_from_int_ptr(ptr, retain); - case CLASS_COMMAND_QUEUE: - return clobj_from_int_ptr(ptr, retain); - case CLASS_BUFFER: - return clobj_from_int_ptr(ptr, retain); - case CLASS_PROGRAM: - return clobj_from_int_ptr(ptr, retain); - case CLASS_EVENT: - return clobj_from_int_ptr(ptr, retain); - case CLASS_IMAGE: - return clobj_from_int_ptr(ptr, retain); - case CLASS_SAMPLER: - return clobj_from_int_ptr(ptr, retain); -#ifdef HAVE_GL - case CLASS_GL_BUFFER: - return clobj_from_int_ptr(ptr, retain); - case CLASS_GL_RENDERBUFFER: - return clobj_from_int_ptr(ptr, retain); -#endif - default: - throw clerror("unknown class", CL_INVALID_VALUE); - } -} - -error* -clobj__from_int_ptr(clobj_t *out, intptr_t ptr, class_t class_, int retain) -{ - return c_handle_error([&] { - *out = _from_int_ptr(ptr, class_, retain); - }); -} - -error* -clobj__get_info(clobj_t obj, cl_uint param, generic_info *out) -{ - return c_handle_error([&] { - if (PYOPENCL_UNLIKELY(!obj)) { - throw clerror("NULL input", CL_INVALID_VALUE); - } - *out = obj->get_info(param); - }); -} - -void -clobj__delete(clobj_t obj) -{ - delete obj; -} - -// }}} - -// vim: foldmethod=marker diff --git a/src/c_wrapper/wrap_cl.h b/src/c_wrapper/wrap_cl.h deleted file mode 100644 index 21ff9c08..00000000 --- a/src/c_wrapper/wrap_cl.h +++ /dev/null @@ -1,171 +0,0 @@ -#ifndef _WRAP_CL_H -#define _WRAP_CL_H - - -// CL 1.2 undecided: -// clSetPrintfCallback - -// {{{ includes - -#include - -#include "pyopencl_ext.h" - -#define CL_USE_DEPRECATED_OPENCL_1_1_APIS - -#if (defined(__APPLE__) && !defined(PYOPENCL_APPLE_USE_CL_H)) - -// {{{ Mac - -#define PYOPENCL_HAVE_EVENT_SET_CALLBACK - -#ifdef HAVE_GL - -#define PYOPENCL_GL_SHARING_VERSION 1 - -#include -#include -#include -#endif -// }}} - -#else - -// {{{ elsewhere - -#if defined(_WIN32) - -// {{{ Windows - -#define NOMINMAX -#include -#define strdup _strdup -#define strcasecmp _stricmp - -#if _MSC_VER >= 1900 /* VS 2015 and higher */ -#define PYOPENCL_HAVE_EVENT_SET_CALLBACK -#endif - -// }}} - -#else - -// {{{ non-Windows - -#include -#define PYOPENCL_HAVE_EVENT_SET_CALLBACK - -// }}} - -#endif - -#ifdef HAVE_GL -#include -#include -#endif - -#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) -#define PYOPENCL_GL_SHARING_VERSION cl_khr_gl_sharing -#endif - -// }}} - -#endif - -// }}} - - -// {{{ version handling - -#ifdef PYOPENCL_PRETEND_CL_VERSION -#define PYOPENCL_CL_VERSION PYOPENCL_PRETEND_CL_VERSION -#else - -#if defined(CL_VERSION_2_2) -#define PYOPENCL_CL_VERSION 0x2020 -#elif defined(CL_VERSION_2_1) -#define PYOPENCL_CL_VERSION 0x2010 -#elif defined(CL_VERSION_2_0) -#define PYOPENCL_CL_VERSION 0x2000 -#elif defined(CL_VERSION_1_2) -#define PYOPENCL_CL_VERSION 0x1020 -#elif defined(CL_VERSION_1_1) -#define PYOPENCL_CL_VERSION 0x1010 -#else -#define PYOPENCL_CL_VERSION 0x1000 -#endif - -#endif - -// }}} - -#ifndef CL_VERSION_2_0 -typedef void* CLeglImageKHR; -typedef void* CLeglDisplayKHR; -typedef void* CLeglSyncKHR; -typedef intptr_t cl_egl_image_properties_khr; -typedef cl_bitfield cl_device_svm_capabilities; -typedef cl_bitfield cl_svm_mem_flags; -typedef intptr_t cl_pipe_properties; -typedef cl_uint cl_pipe_info; -typedef cl_bitfield cl_sampler_properties; -typedef cl_uint cl_kernel_exec_info; -#endif - -#ifndef CL_VERSION_1_2 -typedef intptr_t cl_device_partition_property; -typedef cl_uint cl_kernel_arg_info; - -typedef struct _cl_image_desc { - cl_mem_object_type image_type; - size_t image_width; - size_t image_height; - size_t image_depth; - size_t image_array_size; - size_t image_row_pitch; - size_t image_slice_pitch; - cl_uint num_mip_levels; - cl_uint num_samples; - cl_mem buffer; -} cl_image_desc; - -typedef cl_bitfield cl_mem_migration_flags; -#endif - -#ifndef CL_VERSION_1_1 -typedef struct _cl_buffer_region { - size_t origin; - size_t size; -} cl_buffer_region; -#endif - -#ifndef cl_ext_migrate_memobject -typedef cl_bitfield cl_mem_migration_flags_ext; -#endif - -struct clbase; -typedef clbase *clobj_t; - -#ifdef __cplusplus -extern "C" { -#endif - -#include "wrap_cl_core.h" - -#ifdef HAVE_GL -#include "wrap_cl_gl_core.h" -#endif - -#ifdef __cplusplus -} -#endif - -#if defined __GNUC__ || defined __GNUG__ -#define PYOPENCL_USE_RESULT __attribute__((warn_unused_result)) -#else -#define PYOPENCL_USE_RESULT -#endif - -#endif - -// vim: foldmethod=marker diff --git a/src/c_wrapper/wrap_cl_core.h b/src/c_wrapper/wrap_cl_core.h deleted file mode 100644 index 184cd001..00000000 --- a/src/c_wrapper/wrap_cl_core.h +++ /dev/null @@ -1,399 +0,0 @@ -// Interface between C and Python - -struct clbase; -typedef struct clbase *clobj_t; - -// {{{ types - -typedef enum { - TYPE_FLOAT, - TYPE_INT, - TYPE_UINT, -} type_t; - -typedef enum { - KND_UNKNOWN, - KND_SOURCE, - KND_BINARY -} program_kind_type; - -typedef struct { - const char *routine; - const char *msg; - cl_int code; - int other; -} error; - -typedef enum { - CLASS_NONE, - CLASS_PLATFORM, - CLASS_DEVICE, - CLASS_KERNEL, - CLASS_CONTEXT, - CLASS_BUFFER, - CLASS_PROGRAM, - CLASS_EVENT, - CLASS_COMMAND_QUEUE, - CLASS_GL_BUFFER, - CLASS_GL_RENDERBUFFER, - CLASS_IMAGE, - CLASS_SAMPLER -} class_t; - -typedef struct { - class_t opaque_class; - const char *type; - bool free_type; - void *value; - bool free_value; -} generic_info; - -// }}} - -// {{{ generic functions - -int get_cl_version(); -void free_pointer(void*); -void free_pointer_array(void**, uint32_t size); -void set_py_funcs(int (*_gc)(), void *(*_ref)(void*), void (*_deref)(void*), - void (*_call)(void*, cl_int)); -int have_gl(); - -unsigned bitlog2(unsigned long v); -void populate_constants(void(*add)(const char*, const char*, int64_t value)); -int get_debug(); -void set_debug(int debug); - -// }}} - -// {{{ platform - -error *get_platforms(clobj_t **ptr_platforms, uint32_t *num_platforms); -error *platform__get_devices(clobj_t platform, clobj_t **ptr_devices, - uint32_t *num_devices, cl_device_type devtype); -error *platform__unload_compiler(clobj_t plat); - -// }}} - -// {{{ device -error *device__create_sub_devices(clobj_t _dev, clobj_t **_devs, - uint32_t *num_devices, - const cl_device_partition_property *props); - -// }}} - -// {{{ context - -error *create_context(clobj_t *ctx, const cl_context_properties *props, - cl_uint num_devices, const clobj_t *ptr_devices); -error *create_context_from_type(clobj_t *_ctx, - const cl_context_properties *props, - cl_device_type dev_type); -error *context__get_supported_image_formats(clobj_t context, cl_mem_flags flags, - cl_mem_object_type image_type, - generic_info *out); - -// }}} - -// {{{ command Queue - -error *create_command_queue(clobj_t *queue, clobj_t context, clobj_t device, - cl_command_queue_properties properties); -error *command_queue__finish(clobj_t queue); -error *command_queue__flush(clobj_t queue); - -// }}} - -// {{{ buffer -error *create_buffer(clobj_t *buffer, clobj_t context, cl_mem_flags flags, - size_t size, void *hostbuf); -error *buffer__get_sub_region(clobj_t *_sub_buf, clobj_t _buf, size_t orig, - size_t size, cl_mem_flags flags); - -// }}} - -// {{{ memory object - -error *memory_object__release(clobj_t obj); -error *memory_object__get_host_array(clobj_t, void **hostptr, size_t *size); - -// }}} - -// {{{ memory map - -error *memory_map__release(clobj_t _map, clobj_t _queue, - const clobj_t *_wait_for, uint32_t num_wait_for, - clobj_t *evt); -void *memory_map__data(clobj_t _map); - -// }}} - -// {{{ svm - -error* svm_alloc( - clobj_t _ctx, cl_mem_flags flags, size_t size, cl_uint alignment, - void **result); -error* svm_free(clobj_t _ctx, void *svm_pointer); -error* enqueue_svm_free( - clobj_t *evt, clobj_t _queue, - cl_uint num_svm_pointers, - void *svm_pointers[], - const clobj_t *_wait_for, uint32_t num_wait_for); -error* enqueue_svm_memcpy( - clobj_t *evt, clobj_t _queue, - cl_bool is_blocking, - void *dst_ptr, const void *src_ptr, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for, - void *pyobj); -error* enqueue_svm_memfill( - clobj_t *evt, clobj_t _queue, - void *svm_ptr, - const void *pattern, size_t pattern_size, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for); -error* enqueue_svm_map( - clobj_t *evt, clobj_t _queue, - cl_bool blocking_map, cl_map_flags map_flags, - void *svm_ptr, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for); -error* enqueue_svm_unmap( - clobj_t *evt, clobj_t _queue, - void *svm_ptr, - const clobj_t *_wait_for, uint32_t num_wait_for); -error* enqueue_svm_migrate_mem( - clobj_t *evt, clobj_t _queue, - cl_uint num_svm_pointers, - const void **svm_pointers, - const size_t *sizes, - cl_mem_migration_flags flags, - const clobj_t *_wait_for, uint32_t num_wait_for); - -// }}} - -// {{{ program - -error *create_program_with_source(clobj_t *program, clobj_t context, - const char *src); -error* create_program_with_il(clobj_t *prog, clobj_t _ctx, void *il, size_t length); -error *create_program_with_binary(clobj_t *program, clobj_t context, - cl_uint num_devices, const clobj_t *devices, - const unsigned char **binaries, - size_t *binary_sizes); -error *program__build(clobj_t program, const char *options, - cl_uint num_devices, const clobj_t *devices); -error *program__kind(clobj_t program, int *kind); -error *program__get_build_info(clobj_t program, clobj_t device, - cl_program_build_info param, generic_info *out); -error *program__create_with_builtin_kernels(clobj_t *_prg, clobj_t _ctx, - const clobj_t *_devs, - uint32_t num_devs, - const char *names); -error *program__compile(clobj_t _prg, const char *opts, const clobj_t *_devs, - size_t num_devs, const clobj_t *_prgs, - const char *const *names, size_t num_hdrs); -error *program__link(clobj_t *_prg, clobj_t _ctx, const clobj_t *_prgs, - size_t num_prgs, const char *opts, - const clobj_t *_devs, size_t num_devs); -error *program__all_kernels(clobj_t _prg, clobj_t **_knl, uint32_t *size); - -// }}} - -// {{{ sampler - -error *create_sampler(clobj_t *sampler, clobj_t context, int norm_coords, - cl_addressing_mode am, cl_filter_mode fm); - -// }}} - -// {{{ kernel - -error *create_kernel(clobj_t *kernel, clobj_t program, const char *name); -error *kernel__set_arg_null(clobj_t kernel, cl_uint arg_index); -error *kernel__set_arg_mem(clobj_t kernel, cl_uint arg_index, clobj_t mem); -error *kernel__set_arg_sampler(clobj_t kernel, cl_uint arg_index, - clobj_t sampler); -error *kernel__set_arg_buf(clobj_t kernel, cl_uint arg_index, - const void *buffer, size_t size); -error *kernel__set_arg_svm_pointer(clobj_t kernel, cl_uint arg_index, void *value); -error *kernel__get_work_group_info(clobj_t kernel, - cl_kernel_work_group_info param, - clobj_t device, generic_info *out); -error *kernel__get_arg_info(clobj_t _knl, cl_uint idx, - cl_kernel_arg_info param, generic_info *out); - -// }}} - -// {{{ image -error *create_image_2d(clobj_t *image, clobj_t context, cl_mem_flags flags, - cl_image_format *fmt, size_t width, size_t height, - size_t pitch, void *buffer); -error *create_image_3d(clobj_t *image, clobj_t context, cl_mem_flags flags, - cl_image_format *fmt, size_t width, size_t height, - size_t depth, size_t pitch_x, size_t pitch_y, - void *buffer); -error *create_image_from_desc(clobj_t *img, clobj_t _ctx, cl_mem_flags flags, - cl_image_format *fmt, cl_image_desc *desc, - void *buffer); -error *image__get_image_info(clobj_t img, cl_image_info param, - generic_info *out); -type_t image__get_fill_type(clobj_t img); -// }}} - -// {{{ event - -error *event__get_profiling_info(clobj_t event, cl_profiling_info param, - generic_info *out); -error *event__wait(clobj_t event); -error *event__set_callback(clobj_t _evt, cl_int type, void *pyobj); -error *wait_for_events(const clobj_t *_wait_for, uint32_t num_wait_for); - -// }}} - -// {{{ nanny event - -void *nanny_event__get_ward(clobj_t evt); - -// }}} - -// {{{ user event - -error *create_user_event(clobj_t *_evt, clobj_t _ctx); -error *user_event__set_status(clobj_t _evt, cl_int status); - -// }}} - -// {{{ enqueue_* -error *enqueue_nd_range_kernel(clobj_t *event, clobj_t queue, - clobj_t kernel, cl_uint work_dim, - const size_t *global_work_offset, - const size_t *global_work_size, - const size_t *local_work_size, - const clobj_t *wait_for, uint32_t num_wait_for); -error *enqueue_task(clobj_t *_evt, clobj_t _queue, clobj_t _knl, - const clobj_t *_wait_for, uint32_t num_wait_for); - -error *enqueue_marker_with_wait_list(clobj_t *event, clobj_t queue, - const clobj_t *wait_for, - uint32_t num_wait_for); -error *enqueue_barrier_with_wait_list(clobj_t *event, clobj_t queue, - const clobj_t *wait_for, - uint32_t num_wait_for); -error *enqueue_wait_for_events(clobj_t _queue, const clobj_t *_wait_for, - uint32_t num_wait_for); -error *enqueue_marker(clobj_t *event, clobj_t queue); -error *enqueue_barrier(clobj_t queue); -error *enqueue_migrate_mem_objects(clobj_t *evt, clobj_t _queue, - const clobj_t *_mem_obj, uint32_t, - cl_mem_migration_flags flags, - const clobj_t *_wait_for, uint32_t num_wait_for); - -// }}} - -// {{{ enqueue_*_buffer* - -error *enqueue_read_buffer(clobj_t *event, clobj_t queue, clobj_t mem, - void *buffer, size_t size, size_t device_offset, - const clobj_t *wait_for, uint32_t num_wait_for, - int is_blocking, void *pyobj); -error *enqueue_copy_buffer(clobj_t *event, clobj_t queue, clobj_t src, - clobj_t dst, ptrdiff_t byte_count, - size_t src_offset, size_t dst_offset, - const clobj_t *wait_for, uint32_t num_wait_for); -error *enqueue_write_buffer(clobj_t *event, clobj_t queue, clobj_t mem, - const void *buffer, size_t size, - size_t device_offset, const clobj_t *wait_for, - uint32_t num_wait_for, int is_blocking, - void *pyobj); -error *enqueue_map_buffer(clobj_t *_evt, clobj_t *mpa, clobj_t _queue, - clobj_t _mem, cl_map_flags flags, size_t offset, - size_t size, const clobj_t *_wait_for, - uint32_t num_wait_for, int block); -error *enqueue_fill_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, - void *pattern, size_t psize, size_t offset, - size_t size, const clobj_t *_wait_for, - uint32_t num_wait_for); -error *enqueue_read_buffer_rect(clobj_t *evt, clobj_t _queue, clobj_t _mem, - void *buf, const size_t *_buf_orig, - size_t buf_orig_l, const size_t *_host_orig, - size_t host_orig_l, const size_t *_reg, - size_t reg_l, const size_t *_buf_pitches, - size_t buf_pitches_l, - const size_t *_host_pitches, - size_t host_pitches_l, const clobj_t *_wait_for, - uint32_t num_wait_for, int block, void *pyobj); -error *enqueue_write_buffer_rect(clobj_t *evt, clobj_t _queue, clobj_t _mem, - void *buf, const size_t *_buf_orig, - size_t buf_orig_l, const size_t *_host_orig, - size_t host_orig_l, const size_t *_reg, - size_t reg_l, const size_t *_buf_pitches, - size_t buf_pitches_l, - const size_t *_host_pitches, - size_t host_pitches_l, - const clobj_t *_wait_for, - uint32_t num_wait_for, int block, void *pyobj); -error *enqueue_copy_buffer_rect(clobj_t *evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, const size_t *_src_orig, - size_t src_orig_l, const size_t *_dst_orig, - size_t dst_orig_l, const size_t *_reg, - size_t reg_l, const size_t *_src_pitches, - size_t src_pitches_l, - const size_t *_dst_pitches, - size_t dst_pitches_l, const clobj_t *_wait_for, - uint32_t num_wait_for); - -// }}} - -// {{{ enqueue_*_image* - -error *enqueue_read_image(clobj_t *event, clobj_t queue, clobj_t mem, - const size_t *origin, size_t origin_l, - const size_t *region, size_t region_l, - void *buffer, size_t row_pitch, size_t slice_pitch, - const clobj_t *wait_for, uint32_t num_wait_for, - int is_blocking, void *pyobj); -error *enqueue_copy_image(clobj_t *_evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, const size_t *_src_origin, - size_t src_origin_l, const size_t *_dst_origin, - size_t dst_origin_l, const size_t *_region, - size_t region_l, const clobj_t *_wait_for, - uint32_t num_wait_for); -error *enqueue_write_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, - const size_t *origin, size_t origin_l, - const size_t *region, size_t region_l, - const void *buffer, size_t row_pitch, - size_t slice_pitch, const clobj_t *_wait_for, - uint32_t num_wait_for, int is_blocking, - void *pyobj); -error *enqueue_map_image(clobj_t *_evt, clobj_t *map, clobj_t _queue, - clobj_t _mem, cl_map_flags flags, - const size_t *_origin, size_t origin_l, - const size_t *_region, size_t region_l, - size_t *row_pitch, size_t *slice_pitch, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block); -error *enqueue_fill_image(clobj_t *evt, clobj_t _queue, clobj_t mem, - const void *color, const size_t *_origin, - size_t origin_l, const size_t *_region, - size_t region_l, const clobj_t *_wait_for, - uint32_t num_wait_for); -error *enqueue_copy_image_to_buffer(clobj_t *evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, const size_t *_orig, size_t, - const size_t *_reg, size_t, size_t offset, - const clobj_t *_wait_for, uint32_t); -error *enqueue_copy_buffer_to_image(clobj_t *evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, size_t offset, - const size_t *_orig, size_t, - const size_t *_reg, size_t, - const clobj_t *_wait_for, uint32_t); - -// }}} - -// {{{ cl object - -intptr_t clobj__int_ptr(clobj_t obj); -error *clobj__get_info(clobj_t obj, cl_uint param, generic_info *out); -void clobj__delete(clobj_t obj); -error *clobj__from_int_ptr(clobj_t *out, intptr_t ptr, class_t, int); - -// }}} - -// vim: foldmethod=marker diff --git a/src/c_wrapper/wrap_cl_gl_core.h b/src/c_wrapper/wrap_cl_gl_core.h deleted file mode 100644 index 606d7c1d..00000000 --- a/src/c_wrapper/wrap_cl_gl_core.h +++ /dev/null @@ -1,18 +0,0 @@ -// Interface between C and Python for GL related functions - -error* create_from_gl_texture(clobj_t *ptr, clobj_t _ctx, cl_mem_flags flags, - GLenum texture_target, GLint miplevel, - GLuint texture); -error *create_from_gl_buffer(clobj_t *ptr, clobj_t context, - cl_mem_flags flags, GLuint bufobj); -error *create_from_gl_renderbuffer(clobj_t *ptr, clobj_t context, - cl_mem_flags flags, GLuint bufobj); -error *enqueue_acquire_gl_objects( - clobj_t *event, clobj_t queue, const clobj_t *mem_objects, - uint32_t num_mem_objects, const clobj_t *wait_for, uint32_t num_wait_for); -error *enqueue_release_gl_objects( - clobj_t *event, clobj_t queue, const clobj_t *mem_objects, - uint32_t num_mem_objects, const clobj_t *wait_for, uint32_t num_wait_for); -cl_context_properties get_apple_cgl_share_group(); -error *get_gl_object_info(clobj_t mem, cl_gl_object_type *otype, - GLuint *gl_name); diff --git a/src/c_wrapper/wrap_constants.cpp b/src/c_wrapper/wrap_constants.cpp deleted file mode 100644 index 08ed2ede..00000000 --- a/src/c_wrapper/wrap_constants.cpp +++ /dev/null @@ -1,827 +0,0 @@ -#include "wrap_cl.h" -#include - -#ifdef CONST -#undef CONST -#endif - -extern "C" -void populate_constants(void(*add)(const char*, const char*, int64_t value)) -{ -#define _ADD_ATTR(TYPE, PREFIX, NAME, SUFFIX, ...) \ - add(TYPE, #NAME, CL_##PREFIX##NAME##SUFFIX) -#define ADD_ATTR(TYPE, PREFIX, NAME, ...) \ - _ADD_ATTR(TYPE, PREFIX, NAME, __VA_ARGS__) - - // program_kind - add("program_kind", "UNKNOWN", KND_UNKNOWN); - add("program_kind", "SOURCE", KND_SOURCE); - add("program_kind", "BINARY", KND_BINARY); - - // status_code - ADD_ATTR("status_code", , SUCCESS); - ADD_ATTR("status_code", , DEVICE_NOT_FOUND); - ADD_ATTR("status_code", , DEVICE_NOT_AVAILABLE); -#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) - ADD_ATTR("status_code", , COMPILER_NOT_AVAILABLE); -#endif - ADD_ATTR("status_code", , MEM_OBJECT_ALLOCATION_FAILURE); - ADD_ATTR("status_code", , OUT_OF_RESOURCES); - ADD_ATTR("status_code", , OUT_OF_HOST_MEMORY); - ADD_ATTR("status_code", , PROFILING_INFO_NOT_AVAILABLE); - ADD_ATTR("status_code", , MEM_COPY_OVERLAP); - ADD_ATTR("status_code", , IMAGE_FORMAT_MISMATCH); - ADD_ATTR("status_code", , IMAGE_FORMAT_NOT_SUPPORTED); - ADD_ATTR("status_code", , BUILD_PROGRAM_FAILURE); - ADD_ATTR("status_code", , MAP_FAILURE); - - ADD_ATTR("status_code", , INVALID_VALUE); - ADD_ATTR("status_code", , INVALID_DEVICE_TYPE); - ADD_ATTR("status_code", , INVALID_PLATFORM); - ADD_ATTR("status_code", , INVALID_DEVICE); - ADD_ATTR("status_code", , INVALID_CONTEXT); - ADD_ATTR("status_code", , INVALID_QUEUE_PROPERTIES); - ADD_ATTR("status_code", , INVALID_COMMAND_QUEUE); - ADD_ATTR("status_code", , INVALID_HOST_PTR); - ADD_ATTR("status_code", , INVALID_MEM_OBJECT); - ADD_ATTR("status_code", , INVALID_IMAGE_FORMAT_DESCRIPTOR); - ADD_ATTR("status_code", , INVALID_IMAGE_SIZE); - ADD_ATTR("status_code", , INVALID_SAMPLER); - ADD_ATTR("status_code", , INVALID_BINARY); - ADD_ATTR("status_code", , INVALID_BUILD_OPTIONS); - ADD_ATTR("status_code", , INVALID_PROGRAM); - ADD_ATTR("status_code", , INVALID_PROGRAM_EXECUTABLE); - ADD_ATTR("status_code", , INVALID_KERNEL_NAME); - ADD_ATTR("status_code", , INVALID_KERNEL_DEFINITION); - ADD_ATTR("status_code", , INVALID_KERNEL); - ADD_ATTR("status_code", , INVALID_ARG_INDEX); - ADD_ATTR("status_code", , INVALID_ARG_VALUE); - ADD_ATTR("status_code", , INVALID_ARG_SIZE); - ADD_ATTR("status_code", , INVALID_KERNEL_ARGS); - ADD_ATTR("status_code", , INVALID_WORK_DIMENSION); - ADD_ATTR("status_code", , INVALID_WORK_GROUP_SIZE); - ADD_ATTR("status_code", , INVALID_WORK_ITEM_SIZE); - ADD_ATTR("status_code", , INVALID_GLOBAL_OFFSET); - ADD_ATTR("status_code", , INVALID_EVENT_WAIT_LIST); - ADD_ATTR("status_code", , INVALID_EVENT); - ADD_ATTR("status_code", , INVALID_OPERATION); - ADD_ATTR("status_code", , INVALID_GL_OBJECT); - ADD_ATTR("status_code", , INVALID_BUFFER_SIZE); - ADD_ATTR("status_code", , INVALID_MIP_LEVEL); - -#if defined(cl_khr_icd) && (cl_khr_icd >= 1) - ADD_ATTR("status_code", , PLATFORM_NOT_FOUND_KHR); -#endif - -#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) - ADD_ATTR("status_code", , INVALID_GL_SHAREGROUP_REFERENCE_KHR); -#endif - -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("status_code", , MISALIGNED_SUB_BUFFER_OFFSET); - ADD_ATTR("status_code", , EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); - ADD_ATTR("status_code", , INVALID_GLOBAL_WORK_SIZE); -#endif - -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("status_code", , COMPILE_PROGRAM_FAILURE); - ADD_ATTR("status_code", , LINKER_NOT_AVAILABLE); - ADD_ATTR("status_code", , LINK_PROGRAM_FAILURE); - ADD_ATTR("status_code", , DEVICE_PARTITION_FAILED); - ADD_ATTR("status_code", , KERNEL_ARG_INFO_NOT_AVAILABLE); - ADD_ATTR("status_code", , INVALID_IMAGE_DESCRIPTOR); - ADD_ATTR("status_code", , INVALID_COMPILER_OPTIONS); - ADD_ATTR("status_code", , INVALID_LINKER_OPTIONS); - ADD_ATTR("status_code", , INVALID_DEVICE_PARTITION_COUNT); -#endif - -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("status_code", , INVALID_PIPE_SIZE); - ADD_ATTR("status_code", , INVALID_DEVICE_QUEUE); -#endif - - // platform_info - ADD_ATTR("platform_info", PLATFORM_, PROFILE); - ADD_ATTR("platform_info", PLATFORM_, VERSION); - ADD_ATTR("platform_info", PLATFORM_, NAME); - ADD_ATTR("platform_info", PLATFORM_, VENDOR); -#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) - ADD_ATTR("platform_info", PLATFORM_, EXTENSIONS); -#endif - - - // device_type - ADD_ATTR("device_type", DEVICE_TYPE_, DEFAULT); - ADD_ATTR("device_type", DEVICE_TYPE_, CPU); - ADD_ATTR("device_type", DEVICE_TYPE_, GPU); - ADD_ATTR("device_type", DEVICE_TYPE_, ACCELERATOR); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("device_type", DEVICE_TYPE_, CUSTOM); -#endif - ADD_ATTR("device_type", DEVICE_TYPE_, ALL); - - - // device_info - ADD_ATTR("device_info", DEVICE_, TYPE); - ADD_ATTR("device_info", DEVICE_, VENDOR_ID); - ADD_ATTR("device_info", DEVICE_, MAX_COMPUTE_UNITS); - ADD_ATTR("device_info", DEVICE_, MAX_WORK_ITEM_DIMENSIONS); - ADD_ATTR("device_info", DEVICE_, MAX_WORK_GROUP_SIZE); - ADD_ATTR("device_info", DEVICE_, MAX_WORK_ITEM_SIZES); - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_CHAR); - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_SHORT); - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_INT); - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_LONG); - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_FLOAT); - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_DOUBLE); - ADD_ATTR("device_info", DEVICE_, MAX_CLOCK_FREQUENCY); - ADD_ATTR("device_info", DEVICE_, ADDRESS_BITS); - ADD_ATTR("device_info", DEVICE_, MAX_READ_IMAGE_ARGS); - ADD_ATTR("device_info", DEVICE_, MAX_WRITE_IMAGE_ARGS); - ADD_ATTR("device_info", DEVICE_, MAX_MEM_ALLOC_SIZE); - ADD_ATTR("device_info", DEVICE_, IMAGE2D_MAX_WIDTH); - ADD_ATTR("device_info", DEVICE_, IMAGE2D_MAX_HEIGHT); - ADD_ATTR("device_info", DEVICE_, IMAGE3D_MAX_WIDTH); - ADD_ATTR("device_info", DEVICE_, IMAGE3D_MAX_HEIGHT); - ADD_ATTR("device_info", DEVICE_, IMAGE3D_MAX_DEPTH); - ADD_ATTR("device_info", DEVICE_, IMAGE_SUPPORT); - ADD_ATTR("device_info", DEVICE_, MAX_PARAMETER_SIZE); - ADD_ATTR("device_info", DEVICE_, MAX_SAMPLERS); - ADD_ATTR("device_info", DEVICE_, MEM_BASE_ADDR_ALIGN); - ADD_ATTR("device_info", DEVICE_, MIN_DATA_TYPE_ALIGN_SIZE); - ADD_ATTR("device_info", DEVICE_, SINGLE_FP_CONFIG); -#ifdef CL_DEVICE_DOUBLE_FP_CONFIG - ADD_ATTR("device_info", DEVICE_, DOUBLE_FP_CONFIG); -#endif -#ifdef CL_DEVICE_HALF_FP_CONFIG - ADD_ATTR("device_info", DEVICE_, HALF_FP_CONFIG); -#endif - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_CACHE_TYPE); - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_CACHELINE_SIZE); - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_CACHE_SIZE); - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_SIZE); - ADD_ATTR("device_info", DEVICE_, MAX_CONSTANT_BUFFER_SIZE); - ADD_ATTR("device_info", DEVICE_, MAX_CONSTANT_ARGS); - ADD_ATTR("device_info", DEVICE_, LOCAL_MEM_TYPE); - ADD_ATTR("device_info", DEVICE_, LOCAL_MEM_SIZE); - ADD_ATTR("device_info", DEVICE_, ERROR_CORRECTION_SUPPORT); - ADD_ATTR("device_info", DEVICE_, PROFILING_TIMER_RESOLUTION); - ADD_ATTR("device_info", DEVICE_, ENDIAN_LITTLE); - ADD_ATTR("device_info", DEVICE_, AVAILABLE); - ADD_ATTR("device_info", DEVICE_, COMPILER_AVAILABLE); - ADD_ATTR("device_info", DEVICE_, EXECUTION_CAPABILITIES); - ADD_ATTR("device_info", DEVICE_, QUEUE_PROPERTIES); -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("device_info", DEVICE_, QUEUE_ON_HOST_PROPERTIES); -#endif - ADD_ATTR("device_info", DEVICE_, NAME); - ADD_ATTR("device_info", DEVICE_, VENDOR); - ADD_ATTR("device_info", , DRIVER_VERSION); - ADD_ATTR("device_info", DEVICE_, VERSION); - ADD_ATTR("device_info", DEVICE_, PROFILE); - ADD_ATTR("device_info", DEVICE_, EXTENSIONS); - ADD_ATTR("device_info", DEVICE_, PLATFORM); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_HALF); - ADD_ATTR("device_info", DEVICE_, HOST_UNIFIED_MEMORY); // deprecated in 2.0 - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_CHAR); - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_SHORT); - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_INT); - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_LONG); - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_FLOAT); - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_DOUBLE); - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_HALF); - ADD_ATTR("device_info", DEVICE_, OPENCL_C_VERSION); -#endif -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV - ADD_ATTR("device_info", DEVICE_, COMPUTE_CAPABILITY_MAJOR_NV); - ADD_ATTR("device_info", DEVICE_, COMPUTE_CAPABILITY_MINOR_NV); - ADD_ATTR("device_info", DEVICE_, REGISTERS_PER_BLOCK_NV); - ADD_ATTR("device_info", DEVICE_, WARP_SIZE_NV); - ADD_ATTR("device_info", DEVICE_, GPU_OVERLAP_NV); - ADD_ATTR("device_info", DEVICE_, KERNEL_EXEC_TIMEOUT_NV); - ADD_ATTR("device_info", DEVICE_, INTEGRATED_MEMORY_NV); - // Nvidia specific device attributes, not defined in Khronos CL/cl_ext.h -#ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV - ADD_ATTR("device_info", DEVICE_, ATTRIBUTE_ASYNC_ENGINE_COUNT_NV); -#endif -#ifdef CL_DEVICE_PCI_BUS_ID_NV - ADD_ATTR("device_info", DEVICE_, PCI_BUS_ID_NV); -#endif -#ifdef CL_DEVICE_PCI_SLOT_ID_NV - ADD_ATTR("device_info", DEVICE_, PCI_SLOT_ID_NV); -#endif -#endif -#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD - ADD_ATTR("device_info", DEVICE_, PROFILING_TIMER_OFFSET_AMD); -#endif -#ifdef CL_DEVICE_TOPOLOGY_AMD - ADD_ATTR("device_info", DEVICE_, TOPOLOGY_AMD); -#endif -#ifdef CL_DEVICE_BOARD_NAME_AMD - ADD_ATTR("device_info", DEVICE_, BOARD_NAME_AMD); -#endif -#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD - ADD_ATTR("device_info", DEVICE_, GLOBAL_FREE_MEMORY_AMD); -#endif -#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD - ADD_ATTR("device_info", DEVICE_, SIMD_PER_COMPUTE_UNIT_AMD); -#endif -#ifdef CL_DEVICE_SIMD_WIDTH_AMD - ADD_ATTR("device_info", DEVICE_, SIMD_WIDTH_AMD); -#endif -#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD - ADD_ATTR("device_info", DEVICE_, SIMD_INSTRUCTION_WIDTH_AMD); -#endif -#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD - ADD_ATTR("device_info", DEVICE_, WAVEFRONT_WIDTH_AMD); -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_CHANNELS_AMD); -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_CHANNEL_BANKS_AMD); -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD); -#endif -#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD - ADD_ATTR("device_info", DEVICE_, LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD); -#endif -#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD - ADD_ATTR("device_info", DEVICE_, LOCAL_MEM_BANKS_AMD); -#endif - -#ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD - ADD_ATTR("device_info", DEVICE_, THREAD_TRACE_SUPPORTED_AMD); -#endif -#ifdef CL_DEVICE_GFXIP_MAJOR_AMD - ADD_ATTR("device_info", DEVICE_, GFXIP_MAJOR_AMD); -#endif -#ifdef CL_DEVICE_GFXIP_MINOR_AMD - ADD_ATTR("device_info", DEVICE_, GFXIP_MINOR_AMD); -#endif -#ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD - ADD_ATTR("device_info", DEVICE_, AVAILABLE_ASYNC_QUEUES_AMD); -#endif - -#ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT - ADD_ATTR("device_info", DEVICE_, MAX_ATOMIC_COUNTERS_EXT); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("device_info", DEVICE_, LINKER_AVAILABLE); - ADD_ATTR("device_info", DEVICE_, BUILT_IN_KERNELS); - ADD_ATTR("device_info", DEVICE_, IMAGE_MAX_BUFFER_SIZE); - ADD_ATTR("device_info", DEVICE_, IMAGE_MAX_ARRAY_SIZE); - ADD_ATTR("device_info", DEVICE_, PARENT_DEVICE); - ADD_ATTR("device_info", DEVICE_, PARTITION_MAX_SUB_DEVICES); - ADD_ATTR("device_info", DEVICE_, PARTITION_PROPERTIES); - ADD_ATTR("device_info", DEVICE_, PARTITION_AFFINITY_DOMAIN); - ADD_ATTR("device_info", DEVICE_, PARTITION_TYPE); - ADD_ATTR("device_info", DEVICE_, REFERENCE_COUNT); - ADD_ATTR("device_info", DEVICE_, PREFERRED_INTEROP_USER_SYNC); - ADD_ATTR("device_info", DEVICE_, PRINTF_BUFFER_SIZE); -#endif -#ifdef cl_khr_image2d_from_buffer - ADD_ATTR("device_info", DEVICE_, IMAGE_PITCH_ALIGNMENT); - ADD_ATTR("device_info", DEVICE_, IMAGE_BASE_ADDRESS_ALIGNMENT); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("device_info", DEVICE_, MAX_READ_WRITE_IMAGE_ARGS); - ADD_ATTR("device_info", DEVICE_, MAX_GLOBAL_VARIABLE_SIZE); - ADD_ATTR("device_info", DEVICE_, QUEUE_ON_DEVICE_PROPERTIES); - ADD_ATTR("device_info", DEVICE_, QUEUE_ON_DEVICE_PREFERRED_SIZE); - ADD_ATTR("device_info", DEVICE_, QUEUE_ON_DEVICE_MAX_SIZE); - ADD_ATTR("device_info", DEVICE_, MAX_ON_DEVICE_QUEUES); - ADD_ATTR("device_info", DEVICE_, MAX_ON_DEVICE_EVENTS); - ADD_ATTR("device_info", DEVICE_, SVM_CAPABILITIES); - ADD_ATTR("device_info", DEVICE_, GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE); - ADD_ATTR("device_info", DEVICE_, MAX_PIPE_ARGS); - ADD_ATTR("device_info", DEVICE_, PIPE_MAX_ACTIVE_RESERVATIONS); - ADD_ATTR("device_info", DEVICE_, PIPE_MAX_PACKET_SIZE); - ADD_ATTR("device_info", DEVICE_, PREFERRED_PLATFORM_ATOMIC_ALIGNMENT); - ADD_ATTR("device_info", DEVICE_, PREFERRED_GLOBAL_ATOMIC_ALIGNMENT); - ADD_ATTR("device_info", DEVICE_, PREFERRED_LOCAL_ATOMIC_ALIGNMENT); -#endif -#if PYOPENCL_CL_VERSION >= 0x2010 - ADD_ATTR("device_info", DEVICE_, IL_VERSION); - ADD_ATTR("device_info", DEVICE_, MAX_NUM_SUB_GROUPS); - ADD_ATTR("device_info", DEVICE_, SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS); -#endif - /* cl_intel_advanced_motion_estimation */ -#ifdef CL_DEVICE_ME_VERSION_INTEL - ADD_ATTR("device_info", DEVICE_, ME_VERSION_INTEL); -#endif - - /* cl_qcom_ext_host_ptr */ -#ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM - ADD_ATTR("device_info", DEVICE_, EXT_MEM_PADDING_IN_BYTES_QCOM); -#endif -#ifdef CL_DEVICE_PAGE_SIZE_QCOM - ADD_ATTR("device_info", DEVICE_, PAGE_SIZE_QCOM); -#endif - - /* cl_khr_spir */ -#ifdef CL_DEVICE_SPIR_VERSIONS - ADD_ATTR("device_info", DEVICE_, SPIR_VERSIONS); -#endif - - /* cl_altera_device_temperature */ -#ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA - ADD_ATTR("device_info", DEVICE_, CORE_TEMPERATURE_ALTERA); -#endif - - /* cl_intel_simultaneous_sharing */ -#ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL - ADD_ATTR("device_info", DEVICE_, SIMULTANEOUS_INTEROPS_INTEL); -#endif -#ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL - ADD_ATTR("device_info", DEVICE_, NUM_SIMULTANEOUS_INTEROPS_INTEL); -#endif - - // device_fp_config - ADD_ATTR("device_fp_config", FP_, DENORM); - ADD_ATTR("device_fp_config", FP_, INF_NAN); - ADD_ATTR("device_fp_config", FP_, ROUND_TO_NEAREST); - ADD_ATTR("device_fp_config", FP_, ROUND_TO_ZERO); - ADD_ATTR("device_fp_config", FP_, ROUND_TO_INF); - ADD_ATTR("device_fp_config", FP_, FMA); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("device_fp_config", FP_, SOFT_FLOAT); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("device_fp_config", FP_, CORRECTLY_ROUNDED_DIVIDE_SQRT); -#endif - - - // device_mem_cache_type - ADD_ATTR("device_mem_cache_type", , NONE); - ADD_ATTR("device_mem_cache_type", , READ_ONLY_CACHE); - ADD_ATTR("device_mem_cache_type", , READ_WRITE_CACHE); - - - // device_local_mem_type - ADD_ATTR("device_local_mem_type", , LOCAL); - ADD_ATTR("device_local_mem_type", , GLOBAL); - - - // device_exec_capabilities - ADD_ATTR("device_exec_capabilities", EXEC_, KERNEL); - ADD_ATTR("device_exec_capabilities", EXEC_, NATIVE_KERNEL); -#ifdef CL_EXEC_IMMEDIATE_EXECUTION_INTEL - ADD_ATTR("device_exec_capabilities", EXEC_, IMMEDIATE_EXECUTION_INTEL); -#endif - -#if PYOPENCL_CL_VERSION >= 0x2000 - // device_svm_capabilities - ADD_ATTR("device_svm_capabilities", DEVICE_SVM_, COARSE_GRAIN_BUFFER); - ADD_ATTR("device_svm_capabilities", DEVICE_SVM_, FINE_GRAIN_BUFFER); - ADD_ATTR("device_svm_capabilities", DEVICE_SVM_, FINE_GRAIN_SYSTEM); - ADD_ATTR("device_svm_capabilities", DEVICE_SVM_, ATOMICS); -#endif - - - // command_queue_properties - ADD_ATTR("command_queue_properties", QUEUE_, OUT_OF_ORDER_EXEC_MODE_ENABLE); - ADD_ATTR("command_queue_properties", QUEUE_, PROFILING_ENABLE); -#ifdef CL_QUEUE_IMMEDIATE_EXECUTION_ENABLE_INTEL - ADD_ATTR("command_queue_properties", QUEUE_, IMMEDIATE_EXECUTION_ENABLE_INTEL); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("command_queue_properties", QUEUE_, ON_DEVICE); - ADD_ATTR("command_queue_properties", QUEUE_, ON_DEVICE_DEFAULT); -#endif - - - // context_info - ADD_ATTR("context_info", CONTEXT_, REFERENCE_COUNT); - ADD_ATTR("context_info", CONTEXT_, DEVICES); - ADD_ATTR("context_info", CONTEXT_, PROPERTIES); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("context_info", CONTEXT_, NUM_DEVICES); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("context_info", CONTEXT_, INTEROP_USER_SYNC); -#endif - - - // gl_context_info -#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) - ADD_ATTR("gl_context_info", , CURRENT_DEVICE_FOR_GL_CONTEXT_KHR); - ADD_ATTR("gl_context_info", , DEVICES_FOR_GL_CONTEXT_KHR); -#endif - - - // context_properties - ADD_ATTR("context_properties", CONTEXT_, PLATFORM); -#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) - ADD_ATTR("context_properties", ,GL_CONTEXT_KHR); - ADD_ATTR("context_properties", ,EGL_DISPLAY_KHR); - ADD_ATTR("context_properties", ,GLX_DISPLAY_KHR); - ADD_ATTR("context_properties", ,WGL_HDC_KHR); - ADD_ATTR("context_properties", ,CGL_SHAREGROUP_KHR); -#endif -#if defined(__APPLE__) && defined(HAVE_GL) && !defined(PYOPENCL_APPLE_USE_CL_H) - ADD_ATTR("context_properties", ,CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE); -#endif /* __APPLE__ */ -#ifdef CL_CONTEXT_OFFLINE_DEVICES_AMD - ADD_ATTR("context_properties", CONTEXT_, OFFLINE_DEVICES_AMD); -#endif - - - // command_queue_info - ADD_ATTR("command_queue_info", QUEUE_, CONTEXT); - ADD_ATTR("command_queue_info", QUEUE_, DEVICE); - ADD_ATTR("command_queue_info", QUEUE_, REFERENCE_COUNT); - ADD_ATTR("command_queue_info", QUEUE_, PROPERTIES); - - - // queue_properties -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("queue_properties", QUEUE_, PROPERTIES); - ADD_ATTR("queue_properties", QUEUE_, SIZE); -#endif - - - // mem_flags - ADD_ATTR("mem_flags", MEM_, READ_WRITE); - ADD_ATTR("mem_flags", MEM_, WRITE_ONLY); - ADD_ATTR("mem_flags", MEM_, READ_ONLY); - ADD_ATTR("mem_flags", MEM_, USE_HOST_PTR); - ADD_ATTR("mem_flags", MEM_, ALLOC_HOST_PTR); - ADD_ATTR("mem_flags", MEM_, COPY_HOST_PTR); -#ifdef cl_amd_device_memory_flags - ADD_ATTR("mem_flags", MEM_, USE_PERSISTENT_MEM_AMD); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("mem_flags", MEM_, HOST_WRITE_ONLY); - ADD_ATTR("mem_flags", MEM_, HOST_READ_ONLY); - ADD_ATTR("mem_flags", MEM_, HOST_NO_ACCESS); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("mem_flags", MEM_, KERNEL_READ_AND_WRITE); -#endif - -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("svm_mem_flags", MEM_, READ_WRITE); - ADD_ATTR("svm_mem_flags", MEM_, WRITE_ONLY); - ADD_ATTR("svm_mem_flags", MEM_, READ_ONLY); - ADD_ATTR("svm_mem_flags", MEM_, SVM_FINE_GRAIN_BUFFER); - ADD_ATTR("svm_mem_flags", MEM_, SVM_ATOMICS); -#endif - - - // channel_order - ADD_ATTR("channel_order", , R); - ADD_ATTR("channel_order", , A); - ADD_ATTR("channel_order", , RG); - ADD_ATTR("channel_order", , RA); - ADD_ATTR("channel_order", , RGB); - ADD_ATTR("channel_order", , RGBA); - ADD_ATTR("channel_order", , BGRA); - ADD_ATTR("channel_order", , INTENSITY); - ADD_ATTR("channel_order", , LUMINANCE); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("channel_order", , Rx); - ADD_ATTR("channel_order", , RGx); - ADD_ATTR("channel_order", , RGBx); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("channel_order", , sRGB); - ADD_ATTR("channel_order", , sRGBx); - ADD_ATTR("channel_order", , sRGBA); - ADD_ATTR("channel_order", , sBGRA); - ADD_ATTR("channel_order", , ABGR); -#endif - - - // channel_type - ADD_ATTR("channel_type", , SNORM_INT8); - ADD_ATTR("channel_type", , SNORM_INT16); - ADD_ATTR("channel_type", , UNORM_INT8); - ADD_ATTR("channel_type", , UNORM_INT16); - ADD_ATTR("channel_type", , UNORM_SHORT_565); - ADD_ATTR("channel_type", , UNORM_SHORT_555); - ADD_ATTR("channel_type", , UNORM_INT_101010); - ADD_ATTR("channel_type", , SIGNED_INT8); - ADD_ATTR("channel_type", , SIGNED_INT16); - ADD_ATTR("channel_type", , SIGNED_INT32); - ADD_ATTR("channel_type", , UNSIGNED_INT8); - ADD_ATTR("channel_type", , UNSIGNED_INT16); - ADD_ATTR("channel_type", , UNSIGNED_INT32); - ADD_ATTR("channel_type", , HALF_FLOAT); - ADD_ATTR("channel_type", , FLOAT); - - - // mem_object_type - ADD_ATTR("mem_object_type", MEM_OBJECT_, BUFFER); - ADD_ATTR("mem_object_type", MEM_OBJECT_, IMAGE2D); - ADD_ATTR("mem_object_type", MEM_OBJECT_, IMAGE3D); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("mem_object_type", MEM_OBJECT_, IMAGE2D_ARRAY); - ADD_ATTR("mem_object_type", MEM_OBJECT_, IMAGE1D); - ADD_ATTR("mem_object_type", MEM_OBJECT_, IMAGE1D_ARRAY); - ADD_ATTR("mem_object_type", MEM_OBJECT_, IMAGE1D_BUFFER); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("mem_object_type", MEM_OBJECT_, PIPE); -#endif - - - // mem_info - ADD_ATTR("mem_info", MEM_, TYPE); - ADD_ATTR("mem_info", MEM_, FLAGS); - ADD_ATTR("mem_info", MEM_, SIZE); - ADD_ATTR("mem_info", MEM_, HOST_PTR); - ADD_ATTR("mem_info", MEM_, MAP_COUNT); - ADD_ATTR("mem_info", MEM_, REFERENCE_COUNT); - ADD_ATTR("mem_info", MEM_, CONTEXT); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("mem_info", MEM_, ASSOCIATED_MEMOBJECT); - ADD_ATTR("mem_info", MEM_, OFFSET); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("mem_info", MEM_, USES_SVM_POINTER); -#endif - - - // image_info - ADD_ATTR("image_info", IMAGE_, FORMAT); - ADD_ATTR("image_info", IMAGE_, ELEMENT_SIZE); - ADD_ATTR("image_info", IMAGE_, ROW_PITCH); - ADD_ATTR("image_info", IMAGE_, SLICE_PITCH); - ADD_ATTR("image_info", IMAGE_, WIDTH); - ADD_ATTR("image_info", IMAGE_, HEIGHT); - ADD_ATTR("image_info", IMAGE_, DEPTH); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("image_info", IMAGE_, ARRAY_SIZE); - ADD_ATTR("image_info", IMAGE_, BUFFER); - ADD_ATTR("image_info", IMAGE_, NUM_MIP_LEVELS); - ADD_ATTR("image_info", IMAGE_, NUM_SAMPLES); -#endif - - - // addressing_mode - ADD_ATTR("addressing_mode", ADDRESS_, NONE); - ADD_ATTR("addressing_mode", ADDRESS_, CLAMP_TO_EDGE); - ADD_ATTR("addressing_mode", ADDRESS_, CLAMP); - ADD_ATTR("addressing_mode", ADDRESS_, REPEAT); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("addressing_mode", ADDRESS_, MIRRORED_REPEAT); -#endif - - - // filter_mode - ADD_ATTR("filter_mode", FILTER_, NEAREST); - ADD_ATTR("filter_mode", FILTER_, LINEAR); - - - // sampler_info - ADD_ATTR("sampler_info", SAMPLER_, REFERENCE_COUNT); - ADD_ATTR("sampler_info", SAMPLER_, CONTEXT); - ADD_ATTR("sampler_info", SAMPLER_, NORMALIZED_COORDS); - ADD_ATTR("sampler_info", SAMPLER_, ADDRESSING_MODE); - ADD_ATTR("sampler_info", SAMPLER_, FILTER_MODE); -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("sampler_info", SAMPLER_, MIP_FILTER_MODE); - ADD_ATTR("sampler_info", SAMPLER_, LOD_MIN); - ADD_ATTR("sampler_info", SAMPLER_, LOD_MAX); -#endif - - - // map_flags - ADD_ATTR("map_flags", MAP_, READ); - ADD_ATTR("map_flags", MAP_, WRITE); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("map_flags", MAP_, WRITE_INVALIDATE_REGION); -#endif - - - // program_info - ADD_ATTR("program_info", PROGRAM_, REFERENCE_COUNT); - ADD_ATTR("program_info", PROGRAM_, CONTEXT); - ADD_ATTR("program_info", PROGRAM_, NUM_DEVICES); - ADD_ATTR("program_info", PROGRAM_, DEVICES); - ADD_ATTR("program_info", PROGRAM_, SOURCE); - ADD_ATTR("program_info", PROGRAM_, BINARY_SIZES); - ADD_ATTR("program_info", PROGRAM_, BINARIES); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("program_info", PROGRAM_, NUM_KERNELS); - ADD_ATTR("program_info", PROGRAM_, KERNEL_NAMES); -#endif - - - // program_build_info - ADD_ATTR("program_build_info", PROGRAM_BUILD_, STATUS); - ADD_ATTR("program_build_info", PROGRAM_BUILD_, OPTIONS); - ADD_ATTR("program_build_info", PROGRAM_BUILD_, LOG); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("program_build_info", PROGRAM_, BINARY_TYPE); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("program_build_info", PROGRAM_BUILD_, GLOBAL_VARIABLE_TOTAL_SIZE); -#endif - - - // program_binary_type -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("program_binary_type", PROGRAM_BINARY_TYPE_, NONE); - ADD_ATTR("program_binary_type", PROGRAM_BINARY_TYPE_, COMPILED_OBJECT); - ADD_ATTR("program_binary_type", PROGRAM_BINARY_TYPE_, LIBRARY); - ADD_ATTR("program_binary_type", PROGRAM_BINARY_TYPE_, EXECUTABLE); -#endif - - - // kernel_info - ADD_ATTR("kernel_info", KERNEL_, FUNCTION_NAME); - ADD_ATTR("kernel_info", KERNEL_, NUM_ARGS); - ADD_ATTR("kernel_info", KERNEL_, REFERENCE_COUNT); - ADD_ATTR("kernel_info", KERNEL_, CONTEXT); - ADD_ATTR("kernel_info", KERNEL_, PROGRAM); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("kernel_info", KERNEL_, ATTRIBUTES); -#endif - - - // kernel_arg_info -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("kernel_arg_info", KERNEL_ARG_, ADDRESS_QUALIFIER); - ADD_ATTR("kernel_arg_info", KERNEL_ARG_, ACCESS_QUALIFIER); - ADD_ATTR("kernel_arg_info", KERNEL_ARG_, TYPE_NAME); - ADD_ATTR("kernel_arg_info", KERNEL_ARG_, TYPE_QUALIFIER); - ADD_ATTR("kernel_arg_info", KERNEL_ARG_, NAME); -#endif - - - // kernel_arg_address_qualifier -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("kernel_arg_address_qualifier", KERNEL_ARG_ADDRESS_, GLOBAL); - ADD_ATTR("kernel_arg_address_qualifier", KERNEL_ARG_ADDRESS_, LOCAL); - ADD_ATTR("kernel_arg_address_qualifier", KERNEL_ARG_ADDRESS_, CONSTANT); - ADD_ATTR("kernel_arg_address_qualifier", KERNEL_ARG_ADDRESS_, PRIVATE); -#endif - - - // kernel_arg_access_qualifier -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("kernel_arg_access_qualifier", KERNEL_ARG_ACCESS_, READ_ONLY); - ADD_ATTR("kernel_arg_access_qualifier", KERNEL_ARG_ACCESS_, WRITE_ONLY); - ADD_ATTR("kernel_arg_access_qualifier", KERNEL_ARG_ACCESS_, READ_WRITE); - ADD_ATTR("kernel_arg_access_qualifier", KERNEL_ARG_ACCESS_, NONE); -#endif - - - // kernel_arg_type_qualifier -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("kernel_arg_type_qualifier", KERNEL_ARG_TYPE_, NONE); - ADD_ATTR("kernel_arg_type_qualifier", KERNEL_ARG_TYPE_, CONST); - ADD_ATTR("kernel_arg_type_qualifier", KERNEL_ARG_TYPE_, RESTRICT); - ADD_ATTR("kernel_arg_type_qualifier", KERNEL_ARG_TYPE_, VOLATILE); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("kernel_arg_type_qualifier", KERNEL_ARG_TYPE_, PIPE); -#endif - - - // kernel_work_group_info - ADD_ATTR("kernel_work_group_info", KERNEL_, WORK_GROUP_SIZE); - ADD_ATTR("kernel_work_group_info", KERNEL_, COMPILE_WORK_GROUP_SIZE); - ADD_ATTR("kernel_work_group_info", KERNEL_, LOCAL_MEM_SIZE); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("kernel_work_group_info", KERNEL_, PREFERRED_WORK_GROUP_SIZE_MULTIPLE); - ADD_ATTR("kernel_work_group_info", KERNEL_, PRIVATE_MEM_SIZE); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("kernel_work_group_info", KERNEL_, GLOBAL_WORK_SIZE); -#endif - - - // event_info - ADD_ATTR("event_info", EVENT_, COMMAND_QUEUE); - ADD_ATTR("event_info", EVENT_, COMMAND_TYPE); - ADD_ATTR("event_info", EVENT_, REFERENCE_COUNT); - ADD_ATTR("event_info", EVENT_, COMMAND_EXECUTION_STATUS); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("event_info", EVENT_, CONTEXT); -#endif - - - // command_type - ADD_ATTR("command_type", COMMAND_, NDRANGE_KERNEL); - ADD_ATTR("command_type", COMMAND_, TASK); - ADD_ATTR("command_type", COMMAND_, NATIVE_KERNEL); - ADD_ATTR("command_type", COMMAND_, READ_BUFFER); - ADD_ATTR("command_type", COMMAND_, WRITE_BUFFER); - ADD_ATTR("command_type", COMMAND_, COPY_BUFFER); - ADD_ATTR("command_type", COMMAND_, READ_IMAGE); - ADD_ATTR("command_type", COMMAND_, WRITE_IMAGE); - ADD_ATTR("command_type", COMMAND_, COPY_IMAGE); - ADD_ATTR("command_type", COMMAND_, COPY_IMAGE_TO_BUFFER); - ADD_ATTR("command_type", COMMAND_, COPY_BUFFER_TO_IMAGE); - ADD_ATTR("command_type", COMMAND_, MAP_BUFFER); - ADD_ATTR("command_type", COMMAND_, MAP_IMAGE); - ADD_ATTR("command_type", COMMAND_, UNMAP_MEM_OBJECT); - ADD_ATTR("command_type", COMMAND_, MARKER); - ADD_ATTR("command_type", COMMAND_, ACQUIRE_GL_OBJECTS); - ADD_ATTR("command_type", COMMAND_, RELEASE_GL_OBJECTS); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("command_type", COMMAND_, READ_BUFFER_RECT); - ADD_ATTR("command_type", COMMAND_, WRITE_BUFFER_RECT); - ADD_ATTR("command_type", COMMAND_, COPY_BUFFER_RECT); - ADD_ATTR("command_type", COMMAND_, USER); -#endif -#ifdef cl_ext_migrate_memobject - ADD_ATTR("command_type", COMMAND_, MIGRATE_MEM_OBJECT_EXT); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("command_type", COMMAND_, BARRIER); - ADD_ATTR("command_type", COMMAND_, MIGRATE_MEM_OBJECTS); - ADD_ATTR("command_type", COMMAND_, FILL_BUFFER); - ADD_ATTR("command_type", COMMAND_, FILL_IMAGE); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("command_type", COMMAND_, SVM_FREE); - ADD_ATTR("command_type", COMMAND_, SVM_MEMCPY); - ADD_ATTR("command_type", COMMAND_, SVM_MEMFILL); - ADD_ATTR("command_type", COMMAND_, SVM_MAP); - ADD_ATTR("command_type", COMMAND_, SVM_UNMAP); -#endif - - - // command_execution_status - ADD_ATTR("command_execution_status", , COMPLETE); - ADD_ATTR("command_execution_status", , RUNNING); - ADD_ATTR("command_execution_status", , SUBMITTED); - ADD_ATTR("command_execution_status", , QUEUED); - - - // profiling_info - ADD_ATTR("profiling_info", PROFILING_COMMAND_, QUEUED); - ADD_ATTR("profiling_info", PROFILING_COMMAND_, SUBMIT); - ADD_ATTR("profiling_info", PROFILING_COMMAND_, START); - ADD_ATTR("profiling_info", PROFILING_COMMAND_, END); -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("profiling_info", PROFILING_COMMAND_, COMPLETE); -#endif - - - // mem_migration_flags -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("mem_migration_flags", MIGRATE_MEM_OBJECT_, HOST); - ADD_ATTR("mem_migration_flags", MIGRATE_MEM_OBJECT_, CONTENT_UNDEFINED); -#endif - - - // mem_migration_flags_ext -#ifdef cl_ext_migrate_memobject - ADD_ATTR("mem_migration_flags_ext", MIGRATE_MEM_OBJECT_, HOST, _EXT); - - // As of 2018-07-11, the official headers seem to have dropped this: -#ifdef CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED_EXT - ADD_ATTR("mem_migration_flags_ext", MIGRATE_MEM_OBJECT_, - CONTENT_UNDEFINED, _EXT); -#endif - -#endif - - - // device_partition_property -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("device_partition_property", DEVICE_PARTITION_, EQUALLY); - ADD_ATTR("device_partition_property", DEVICE_PARTITION_, BY_COUNTS); - ADD_ATTR("device_partition_property", DEVICE_PARTITION_, BY_COUNTS_LIST_END); - ADD_ATTR("device_partition_property", DEVICE_PARTITION_, BY_AFFINITY_DOMAIN); -#endif - - - // device_affinity_domain -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("device_affinity_domain", DEVICE_AFFINITY_DOMAIN_, NUMA); - ADD_ATTR("device_affinity_domain", DEVICE_AFFINITY_DOMAIN_, L4_CACHE); - ADD_ATTR("device_affinity_domain", DEVICE_AFFINITY_DOMAIN_, L3_CACHE); - ADD_ATTR("device_affinity_domain", DEVICE_AFFINITY_DOMAIN_, L2_CACHE); - ADD_ATTR("device_affinity_domain", DEVICE_AFFINITY_DOMAIN_, L1_CACHE); - ADD_ATTR("device_affinity_domain", DEVICE_AFFINITY_DOMAIN_, - NEXT_PARTITIONABLE); -#endif - - -#ifdef HAVE_GL - // gl_object_type - ADD_ATTR("gl_object_type", GL_OBJECT_, BUFFER); - ADD_ATTR("gl_object_type", GL_OBJECT_, TEXTURE2D); - ADD_ATTR("gl_object_type", GL_OBJECT_, TEXTURE3D); - ADD_ATTR("gl_object_type", GL_OBJECT_, RENDERBUFFER); - - - // gl_texture_info - ADD_ATTR("gl_texture_info", GL_, TEXTURE_TARGET); - ADD_ATTR("gl_texture_info", GL_, MIPMAP_LEVEL); -#endif - - - // migrate_mem_object_flags_ext -#ifdef cl_ext_migrate_memobject - ADD_ATTR("migrate_mem_object_flags_ext", MIGRATE_MEM_OBJECT_, HOST, _EXT); -#endif -} -- GitLab From 1f27e27559f576ca45d3755b8481f2b953db40aa Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 6 Aug 2018 12:12:19 -0500 Subject: [PATCH 02/92] Add back old Boost Python source --- pybind11 | 1 + src/bitlog.cpp | 27 + src/bitlog.hpp | 53 + src/mempool.hpp | 376 ++++ src/numpy_init.hpp | 34 + src/tools.hpp | 43 + src/wrap_cl.cpp | 24 + src/wrap_cl.hpp | 4303 ++++++++++++++++++++++++++++++++++++++++ src/wrap_cl_part_1.cpp | 312 +++ src/wrap_cl_part_2.cpp | 359 ++++ src/wrap_constants.cpp | 868 ++++++++ src/wrap_helpers.hpp | 175 ++ src/wrap_mempool.cpp | 290 +++ 13 files changed, 6865 insertions(+) create mode 160000 pybind11 create mode 100644 src/bitlog.cpp create mode 100644 src/bitlog.hpp create mode 100644 src/mempool.hpp create mode 100644 src/numpy_init.hpp create mode 100644 src/tools.hpp create mode 100644 src/wrap_cl.cpp create mode 100644 src/wrap_cl.hpp create mode 100644 src/wrap_cl_part_1.cpp create mode 100644 src/wrap_cl_part_2.cpp create mode 100644 src/wrap_constants.cpp create mode 100644 src/wrap_helpers.hpp create mode 100644 src/wrap_mempool.cpp diff --git a/pybind11 b/pybind11 new file mode 160000 index 00000000..f7bc18f5 --- /dev/null +++ b/pybind11 @@ -0,0 +1 @@ +Subproject commit f7bc18f528bb35cd06c93d0a58c17e6eea3fa68c diff --git a/src/bitlog.cpp b/src/bitlog.cpp new file mode 100644 index 00000000..88b820fa --- /dev/null +++ b/src/bitlog.cpp @@ -0,0 +1,27 @@ +#include "bitlog.hpp" + + + + +/* from http://graphics.stanford.edu/~seander/bithacks.html */ +const char pyopencl::log_table_8[] = +{ + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +}; + + diff --git a/src/bitlog.hpp b/src/bitlog.hpp new file mode 100644 index 00000000..405599e7 --- /dev/null +++ b/src/bitlog.hpp @@ -0,0 +1,53 @@ +// Base-2 logarithm bithack. + + + + +#ifndef _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP +#define _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP + + + + +#include +#include + + + + +namespace pyopencl +{ + extern const char log_table_8[]; + + inline unsigned bitlog2_16(boost::uint16_t v) + { + if (unsigned long t = v >> 8) + return 8+log_table_8[t]; + else + return log_table_8[v]; + } + + inline unsigned bitlog2_32(boost::uint32_t v) + { + if (boost::uint16_t t = v >> 16) + return 16+bitlog2_16(t); + else + return bitlog2_16(v); + } + + inline unsigned bitlog2(unsigned long v) + { +#if (ULONG_MAX != 4294967295) + if (boost::uint32_t t = v >> 32) + return 32+bitlog2_32(t); + else +#endif + return bitlog2_32(v); + } +} + + + + + +#endif diff --git a/src/mempool.hpp b/src/mempool.hpp new file mode 100644 index 00000000..be88f13f --- /dev/null +++ b/src/mempool.hpp @@ -0,0 +1,376 @@ +// Abstract memory pool implementation + + + + +#ifndef _AFJDFJSDFSD_PYGPU_HEADER_SEEN_MEMPOOL_HPP +#define _AFJDFJSDFSD_PYGPU_HEADER_SEEN_MEMPOOL_HPP + + + + +#include +#include +#include +#include "bitlog.hpp" + + + + +namespace PYGPU_PACKAGE +{ + template + inline T signed_left_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x >> -shift_amount; + else + return x << shift_amount; + } + + + + + template + inline T signed_right_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x << -shift_amount; + else + return x >> shift_amount; + } + + + + + template + class memory_pool + { + public: + typedef typename Allocator::pointer_type pointer_type; + typedef typename Allocator::size_type size_type; + + private: + typedef boost::uint32_t bin_nr_t; + typedef std::vector bin_t; + + typedef boost::ptr_map container_t; + container_t m_container; + typedef typename container_t::value_type bin_pair_t; + + std::auto_ptr m_allocator; + + // A held block is one that's been released by the application, but that + // we are keeping around to dish out again. + unsigned m_held_blocks; + + // An active block is one that is in use by the application. + unsigned m_active_blocks; + + bool m_stop_holding; + int m_trace; + + public: + memory_pool(Allocator const &alloc=Allocator()) + : m_allocator(alloc.copy()), + m_held_blocks(0), m_active_blocks(0), m_stop_holding(false), + m_trace(false) + { + if (m_allocator->is_deferred()) + { + PyErr_WarnEx(PyExc_UserWarning, "Memory pools expect non-deferred " + "semantics from their allocators. You passed a deferred " + "allocator, i.e. an allocator whose allocations can turn out to " + "be unavailable long after allocation.", 1); + } + } + + virtual ~memory_pool() + { free_held(); } + + static const unsigned mantissa_bits = 2; + static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; + + static bin_nr_t bin_number(size_type size) + { + signed l = bitlog2(size); + size_type shifted = signed_right_shift(size, l-signed(mantissa_bits)); + if (size && (shifted & (1 << mantissa_bits)) == 0) + throw std::runtime_error("memory_pool::bin_number: bitlog2 fault"); + size_type chopped = shifted & mantissa_mask; + return l << mantissa_bits | chopped; + } + + void set_trace(bool flag) + { + if (flag) + ++m_trace; + else + --m_trace; + } + + static size_type alloc_size(bin_nr_t bin) + { + bin_nr_t exponent = bin >> mantissa_bits; + bin_nr_t mantissa = bin & mantissa_mask; + + size_type ones = signed_left_shift(1, + signed(exponent)-signed(mantissa_bits) + ); + if (ones) ones -= 1; + + size_type head = signed_left_shift( + (1<second; + } + + void inc_held_blocks() + { + if (m_held_blocks == 0) + start_holding_blocks(); + ++m_held_blocks; + } + + void dec_held_blocks() + { + --m_held_blocks; + if (m_held_blocks == 0) + stop_holding_blocks(); + } + + virtual void start_holding_blocks() + { } + + virtual void stop_holding_blocks() + { } + + public: + pointer_type allocate(size_type size) + { + bin_nr_t bin_nr = bin_number(size); + bin_t &bin = get_bin(bin_nr); + + if (bin.size()) + { + if (m_trace) + std::cout + << "[pool] allocation of size " << size << " served from bin " << bin_nr + << " which contained " << bin.size() << " entries" << std::endl; + return pop_block_from_bin(bin, size); + } + + size_type alloc_sz = alloc_size(bin_nr); + + assert(bin_number(alloc_sz) == bin_nr); + + if (m_trace) + std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl; + + try { return get_from_allocator(alloc_sz); } + catch (PYGPU_PACKAGE::error &e) + { + if (!e.is_out_of_memory()) + throw; + } + + if (m_trace) + std::cout << "[pool] allocation triggered OOM, running GC" << std::endl; + + m_allocator->try_release_blocks(); + if (bin.size()) + return pop_block_from_bin(bin, size); + + if (m_trace) + std::cout << "[pool] allocation still OOM after GC" << std::endl; + + while (try_to_free_memory()) + { + try { return get_from_allocator(alloc_sz); } + catch (PYGPU_PACKAGE::error &e) + { + if (!e.is_out_of_memory()) + throw; + } + } + + throw PYGPU_PACKAGE::error( + "memory_pool::allocate", +#ifdef PYGPU_PYCUDA + CUDA_ERROR_OUT_OF_MEMORY, +#endif +#ifdef PYGPU_PYOPENCL + CL_MEM_OBJECT_ALLOCATION_FAILURE, +#endif + "failed to free memory for allocation"); + } + + void free(pointer_type p, size_type size) + { + --m_active_blocks; + bin_nr_t bin_nr = bin_number(size); + + if (!m_stop_holding) + { + inc_held_blocks(); + get_bin(bin_nr).push_back(p); + + if (m_trace) + std::cout << "[pool] block of size " << size << " returned to bin " + << bin_nr << " which now contains " << get_bin(bin_nr).size() + << " entries" << std::endl; + } + else + m_allocator->free(p); + } + + void free_held() + { + BOOST_FOREACH(bin_pair_t bin_pair, m_container) + { + bin_t &bin = *bin_pair.second; + + while (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + } + } + + assert(m_held_blocks == 0); + } + + void stop_holding() + { + m_stop_holding = true; + free_held(); + } + + unsigned active_blocks() + { return m_active_blocks; } + + unsigned held_blocks() + { return m_held_blocks; } + + bool try_to_free_memory() + { + BOOST_FOREACH(bin_pair_t bin_pair, + // free largest stuff first + std::make_pair(m_container.rbegin(), m_container.rend())) + { + bin_t &bin = *bin_pair.second; + + if (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + + return true; + } + } + + return false; + } + + private: + pointer_type get_from_allocator(size_type alloc_sz) + { + pointer_type result = m_allocator->allocate(alloc_sz); + ++m_active_blocks; + + return result; + } + + pointer_type pop_block_from_bin(bin_t &bin, size_type size) + { + pointer_type result = bin.back(); + bin.pop_back(); + + dec_held_blocks(); + ++m_active_blocks; + + return result; + } + }; + + + + + + template + class pooled_allocation : public boost::noncopyable + { + public: + typedef Pool pool_type; + typedef typename Pool::pointer_type pointer_type; + typedef typename Pool::size_type size_type; + + private: + boost::shared_ptr m_pool; + + pointer_type m_ptr; + size_type m_size; + bool m_valid; + + public: + pooled_allocation(boost::shared_ptr p, size_type size) + : m_pool(p), m_ptr(p->allocate(size)), m_size(size), m_valid(true) + { } + + ~pooled_allocation() + { + if (m_valid) + free(); + } + + void free() + { + if (m_valid) + { + m_pool->free(m_ptr, m_size); + m_valid = false; + } + else + throw PYGPU_PACKAGE::error( + "pooled_device_allocation::free", +#ifdef PYGPU_PYCUDA + CUDA_ERROR_INVALID_HANDLE +#endif +#ifdef PYGPU_PYOPENCL + CL_INVALID_VALUE +#endif + ); + } + + pointer_type ptr() const + { return m_ptr; } + + size_type size() const + { return m_size; } + }; +} + + + + +#endif diff --git a/src/numpy_init.hpp b/src/numpy_init.hpp new file mode 100644 index 00000000..9d34ac57 --- /dev/null +++ b/src/numpy_init.hpp @@ -0,0 +1,34 @@ +#ifndef _FAYHVVAAA_PYOPENCL_HEADER_SEEN_NUMPY_INIT_HPP +#define _FAYHVVAAA_PYOPENCL_HEADER_SEEN_NUMPY_INIT_HPP + + + + +#include +#include + + + + +namespace +{ + static struct pyublas_array_importer + { + static bool do_import_array() + { + import_array1(false); + return true; + } + + pyublas_array_importer() + { + if (!do_import_array()) + throw std::runtime_error("numpy failed to initialize"); + } + } _array_importer; +} + + + + +#endif diff --git a/src/tools.hpp b/src/tools.hpp new file mode 100644 index 00000000..7254ace1 --- /dev/null +++ b/src/tools.hpp @@ -0,0 +1,43 @@ +#ifndef _ASDFDAFVVAFF_PYCUDA_HEADER_SEEN_TOOLS_HPP +#define _ASDFDAFVVAFF_PYCUDA_HEADER_SEEN_TOOLS_HPP + + + + +#include +#include +#include "numpy_init.hpp" + + + + +namespace pyopencl +{ + inline + npy_intp size_from_dims(int ndim, const npy_intp *dims) + { + if (ndim != 0) + return std::accumulate(dims, dims+ndim, 1, std::multiplies()); + else + return 1; + } + + + + + inline void run_python_gc() + { + namespace py = boost::python; + + py::object gc_mod( + py::handle<>( + PyImport_ImportModule("gc"))); + gc_mod.attr("collect")(); + } +} + + + + + +#endif diff --git a/src/wrap_cl.cpp b/src/wrap_cl.cpp new file mode 100644 index 00000000..9f680f2d --- /dev/null +++ b/src/wrap_cl.cpp @@ -0,0 +1,24 @@ +#include "wrap_cl.hpp" + + + + +using namespace pyopencl; + + + + +extern void pyopencl_expose_constants(); +extern void pyopencl_expose_part_1(); +extern void pyopencl_expose_part_2(); +extern void pyopencl_expose_mempool(); + +BOOST_PYTHON_MODULE(_cl) +{ + pyopencl_expose_constants(); + pyopencl_expose_part_1(); + pyopencl_expose_part_2(); + pyopencl_expose_mempool(); +} + +// vim: foldmethod=marker diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp new file mode 100644 index 00000000..6ee2e33b --- /dev/null +++ b/src/wrap_cl.hpp @@ -0,0 +1,4303 @@ +#ifndef _AFJHAYYTA_PYOPENCL_HEADER_SEEN_WRAP_CL_HPP +#define _AFJHAYYTA_PYOPENCL_HEADER_SEEN_WRAP_CL_HPP + +// CL 1.2 undecided: +// clSetPrintfCallback + +// {{{ includes + +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +// #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + +#ifdef __APPLE__ + +// Mac ------------------------------------------------------------------------ +#include +#ifdef HAVE_GL + +#define PYOPENCL_GL_SHARING_VERSION 1 + +#include +#include +#include +#endif + +#else + +// elsewhere ------------------------------------------------------------------ +#include +#include + +#if defined(_WIN32) +#define NOMINMAX +#include +#endif + +#ifdef HAVE_GL +#include +#include +#endif + +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) +#define PYOPENCL_GL_SHARING_VERSION cl_khr_gl_sharing +#endif + +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include "wrap_helpers.hpp" +#include "numpy_init.hpp" +#include "tools.hpp" + +#ifdef PYOPENCL_PRETEND_CL_VERSION +#define PYOPENCL_CL_VERSION PYOPENCL_PRETEND_CL_VERSION +#else + +#if defined(CL_VERSION_1_2) +#define PYOPENCL_CL_VERSION 0x1020 +#elif defined(CL_VERSION_1_1) +#define PYOPENCL_CL_VERSION 0x1010 +#else +#define PYOPENCL_CL_VERSION 0x1000 +#endif + +#endif + + +#if PY_VERSION_HEX >= 0x03000000 +#define PYOPENCL_USE_NEW_BUFFER_INTERFACE +#endif +// }}} + + + + + +// {{{ tools +#if PY_VERSION_HEX >= 0x02050000 + typedef Py_ssize_t PYOPENCL_BUFFER_SIZE_T; +#else + typedef int PYOPENCL_BUFFER_SIZE_T; +#endif + +#define PYOPENCL_CAST_BOOL(B) ((B) ? CL_TRUE : CL_FALSE) + + + + + +#define PYOPENCL_DEPRECATED(WHAT, KILL_VERSION, EXTRA_MSG) \ + { \ + PyErr_Warn( \ + PyExc_DeprecationWarning, \ + WHAT " is deprecated and will stop working in PyOpenCL " KILL_VERSION". " \ + EXTRA_MSG); \ + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + +#define PYOPENCL_GET_EXT_FUN(PLATFORM, NAME, VAR) \ + NAME##_fn VAR \ + = (NAME##_fn) \ + clGetExtensionFunctionAddressForPlatform(PLATFORM, #NAME); \ + \ + if (!VAR) \ + throw error(#NAME, CL_INVALID_VALUE, #NAME \ + "not available"); + +#else + +#define PYOPENCL_GET_EXT_FUN(PLATFORM, NAME, VAR) \ + NAME##_fn VAR \ + = (NAME##_fn) \ + clGetExtensionFunctionAddress(#NAME); \ + \ + if (!VAR) \ + throw error(#NAME, CL_INVALID_VALUE, #NAME \ + "not available"); + +#endif + + +#define PYOPENCL_PARSE_PY_DEVICES \ + std::vector devices_vec; \ + cl_uint num_devices; \ + cl_device_id *devices; \ + \ + if (py_devices.ptr() == Py_None) \ + { \ + num_devices = 0; \ + devices = 0; \ + } \ + else \ + { \ + PYTHON_FOREACH(py_dev, py_devices) \ + devices_vec.push_back( \ + py::extract(py_dev)().data()); \ + num_devices = devices_vec.size(); \ + devices = devices_vec.empty( ) ? NULL : &devices_vec.front(); \ + } \ + + +#define PYOPENCL_RETRY_RETURN_IF_MEM_ERROR(OPERATION) \ + try \ + { \ + OPERATION \ + } \ + catch (pyopencl::error &e) \ + { \ + if (!e.is_out_of_memory()) \ + throw; \ + } \ + \ + /* If we get here, we got an error from CL. + * We should run the Python GC to try and free up + * some memory references. */ \ + run_python_gc(); \ + \ + /* Now retry the allocation. If it fails again, + * let it fail. */ \ + { \ + OPERATION \ + } + + + + +#define PYOPENCL_RETRY_IF_MEM_ERROR(OPERATION) \ + { \ + bool failed_with_mem_error = false; \ + try \ + { \ + OPERATION \ + } \ + catch (pyopencl::error &e) \ + { \ + failed_with_mem_error = true; \ + if (!e.is_out_of_memory()) \ + throw; \ + } \ + \ + if (failed_with_mem_error) \ + { \ + /* If we get here, we got an error from CL. + * We should run the Python GC to try and free up + * some memory references. */ \ + run_python_gc(); \ + \ + /* Now retry the allocation. If it fails again, + * let it fail. */ \ + { \ + OPERATION \ + } \ + } \ + } + +// }}} + +// {{{ tracing and error reporting +#ifdef PYOPENCL_TRACE + #define PYOPENCL_PRINT_CALL_TRACE(NAME) \ + std::cerr << NAME << std::endl; + #define PYOPENCL_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) \ + std::cerr << NAME << " (" << EXTRA_INFO << ')' << std::endl; +#else + #define PYOPENCL_PRINT_CALL_TRACE(NAME) /*nothing*/ + #define PYOPENCL_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) /*nothing*/ +#endif + +#define PYOPENCL_CALL_GUARDED_THREADED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \ + { \ + PYOPENCL_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \ + cl_int status_code; \ + Py_BEGIN_ALLOW_THREADS \ + status_code = NAME ARGLIST; \ + Py_END_ALLOW_THREADS \ + if (status_code != CL_SUCCESS) \ + throw pyopencl::error(#NAME, status_code);\ + } + +#define PYOPENCL_CALL_GUARDED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \ + { \ + PYOPENCL_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \ + cl_int status_code; \ + status_code = NAME ARGLIST; \ + if (status_code != CL_SUCCESS) \ + throw pyopencl::error(#NAME, status_code);\ + } + +#define PYOPENCL_CALL_GUARDED_THREADED(NAME, ARGLIST) \ + { \ + PYOPENCL_PRINT_CALL_TRACE(#NAME); \ + cl_int status_code; \ + Py_BEGIN_ALLOW_THREADS \ + status_code = NAME ARGLIST; \ + Py_END_ALLOW_THREADS \ + if (status_code != CL_SUCCESS) \ + throw pyopencl::error(#NAME, status_code);\ + } + +#define PYOPENCL_CALL_GUARDED(NAME, ARGLIST) \ + { \ + PYOPENCL_PRINT_CALL_TRACE(#NAME); \ + cl_int status_code; \ + status_code = NAME ARGLIST; \ + if (status_code != CL_SUCCESS) \ + throw pyopencl::error(#NAME, status_code);\ + } +#define PYOPENCL_CALL_GUARDED_CLEANUP(NAME, ARGLIST) \ + { \ + PYOPENCL_PRINT_CALL_TRACE(#NAME); \ + cl_int status_code; \ + status_code = NAME ARGLIST; \ + if (status_code != CL_SUCCESS) \ + std::cerr \ + << "PyOpenCL WARNING: a clean-up operation failed (dead context maybe?)" \ + << std::endl \ + << #NAME " failed with code " << status_code \ + << std::endl; \ + } + +// }}} + +// {{{ get_info helpers +#define PYOPENCL_GET_OPAQUE_INFO(WHAT, FIRST_ARG, SECOND_ARG, CL_TYPE, TYPE) \ + { \ + CL_TYPE param_value; \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, sizeof(param_value), ¶m_value, 0)); \ + if (param_value) \ + return py::object(handle_from_new_ptr( \ + new TYPE(param_value, /*retain*/ true))); \ + else \ + return py::object(); \ + } + +#define PYOPENCL_GET_VEC_INFO(WHAT, FIRST_ARG, SECOND_ARG, RES_VEC) \ + { \ + size_t size; \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, 0, 0, &size)); \ + \ + RES_VEC.resize(size / sizeof(RES_VEC.front())); \ + \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, size, \ + RES_VEC.empty( ) ? NULL : &RES_VEC.front(), &size)); \ + } + +#define PYOPENCL_GET_STR_INFO(WHAT, FIRST_ARG, SECOND_ARG) \ + { \ + size_t param_value_size; \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, 0, 0, ¶m_value_size)); \ + \ + std::vector param_value(param_value_size); \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, param_value_size, \ + param_value.empty( ) ? NULL : ¶m_value.front(), ¶m_value_size)); \ + \ + return py::object( \ + param_value.empty( ) ? "" : std::string(¶m_value.front(), param_value_size-1)); \ + } + + + + +#define PYOPENCL_GET_INTEGRAL_INFO(WHAT, FIRST_ARG, SECOND_ARG, TYPE) \ + { \ + TYPE param_value; \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, sizeof(param_value), ¶m_value, 0)); \ + return py::object(param_value); \ + } + +// }}} + +// {{{ event helpers -------------------------------------------------------------- +#define PYOPENCL_PARSE_WAIT_FOR \ + cl_uint num_events_in_wait_list = 0; \ + std::vector event_wait_list; \ + \ + if (py_wait_for.ptr() != Py_None) \ + { \ + event_wait_list.resize(len(py_wait_for)); \ + PYTHON_FOREACH(evt, py_wait_for) \ + event_wait_list[num_events_in_wait_list++] = \ + py::extract(evt)().data(); \ + } + +#define PYOPENCL_WAITLIST_ARGS \ + num_events_in_wait_list, event_wait_list.empty( ) ? NULL : &event_wait_list.front() + +#define PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, obj) \ + try \ + { \ + return new nanny_event(evt, false, obj); \ + } \ + catch (...) \ + { \ + clReleaseEvent(evt); \ + throw; \ + } + +#define PYOPENCL_RETURN_NEW_EVENT(evt) \ + try \ + { \ + return new event(evt, false); \ + } \ + catch (...) \ + { \ + clReleaseEvent(evt); \ + throw; \ + } + +// }}} + +// {{{ equality testing +#define PYOPENCL_EQUALITY_TESTS(cls) \ + bool operator==(cls const &other) const \ + { return data() == other.data(); } \ + bool operator!=(cls const &other) const \ + { return data() != other.data(); } \ + long hash() const \ + { return (long) (intptr_t) data(); } +// }}} + + + +namespace pyopencl +{ + // {{{ error + class error : public std::runtime_error + { + private: + const char *m_routine; + cl_int m_code; + + public: + error(const char *rout, cl_int c, const char *msg="") + : std::runtime_error(msg), m_routine(rout), m_code(c) + { } + + const char *routine() const + { + return m_routine; + } + + cl_int code() const + { + return m_code; + } + + bool is_out_of_memory() const + { + return (code() == CL_MEM_OBJECT_ALLOCATION_FAILURE + || code() == CL_OUT_OF_RESOURCES + || code() == CL_OUT_OF_HOST_MEMORY); + } + + }; + + // }}} + + + // {{{ buffer interface helper + // +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + class py_buffer_wrapper : public boost::noncopyable + { + private: + bool m_initialized; + + public: + Py_buffer m_buf; + + py_buffer_wrapper() + : m_initialized(false) + {} + + void get(PyObject *obj, int flags) + { + if (PyObject_GetBuffer(obj, &m_buf, flags)) + throw py::error_already_set(); + + m_initialized = true; + } + + virtual ~py_buffer_wrapper() + { + if (m_initialized) + PyBuffer_Release(&m_buf); + } + }; +#endif + + // }}} + + inline + py::tuple get_cl_header_version() + { + return py::make_tuple( + PYOPENCL_CL_VERSION >> (3*4), + (PYOPENCL_CL_VERSION >> (1*4)) & 0xff + ); + } + + + // {{{ platform + class platform : boost::noncopyable + { + private: + cl_platform_id m_platform; + + public: + platform(cl_platform_id pid) + : m_platform(pid) + { } + + platform(cl_platform_id pid, bool /*retain (ignored)*/) + : m_platform(pid) + { } + + cl_platform_id data() const + { + return m_platform; + } + + PYOPENCL_EQUALITY_TESTS(platform); + + py::object get_info(cl_platform_info param_name) const + { + switch (param_name) + { + case CL_PLATFORM_PROFILE: + case CL_PLATFORM_VERSION: + case CL_PLATFORM_NAME: + case CL_PLATFORM_VENDOR: +#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) + case CL_PLATFORM_EXTENSIONS: +#endif + PYOPENCL_GET_STR_INFO(Platform, m_platform, param_name); + + default: + throw error("Platform.get_info", CL_INVALID_VALUE); + } + } + + py::list get_devices(cl_device_type devtype); + }; + + + + + inline + py::list get_platforms() + { + cl_uint num_platforms = 0; + PYOPENCL_CALL_GUARDED(clGetPlatformIDs, (0, 0, &num_platforms)); + + std::vector platforms(num_platforms); + PYOPENCL_CALL_GUARDED(clGetPlatformIDs, + (num_platforms, platforms.empty( ) ? NULL : &platforms.front(), &num_platforms)); + + py::list result; + BOOST_FOREACH(cl_platform_id pid, platforms) + result.append(handle_from_new_ptr( + new platform(pid))); + + return result; + } + + // }}} + + // {{{ device + class device : boost::noncopyable + { + public: + enum reference_type_t { + REF_NOT_OWNABLE, + REF_FISSION_EXT, +#if PYOPENCL_CL_VERSION >= 0x1020 + REF_CL_1_2, +#endif + }; + private: + cl_device_id m_device; + reference_type_t m_ref_type; + + public: + device(cl_device_id did) + : m_device(did), m_ref_type(REF_NOT_OWNABLE) + { } + + device(cl_device_id did, bool retain, reference_type_t ref_type=REF_NOT_OWNABLE) + : m_device(did), m_ref_type(ref_type) + { + if (retain && ref_type != REF_NOT_OWNABLE) + { + if (false) + { } +#if (defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION)) + else if (ref_type == REF_FISSION_EXT) + { +#if PYOPENCL_CL_VERSION >= 0x1020 + cl_platform_id plat; + PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (m_device, CL_DEVICE_PLATFORM, + sizeof(plat), &plat, NULL)); +#endif + + PYOPENCL_GET_EXT_FUN(plat, + clRetainDeviceEXT, retain_func); + + PYOPENCL_CALL_GUARDED(retain_func, (did)); + } +#endif + +#if PYOPENCL_CL_VERSION >= 0x1020 + else if (ref_type == REF_CL_1_2) + { + PYOPENCL_CALL_GUARDED(clRetainDevice, (did)); + } +#endif + + else + throw error("Device", CL_INVALID_VALUE, + "cannot own references to devices when device fission or CL 1.2 is not available"); + } + } + + ~device() + { + if (false) + { } +#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) + else if (m_ref_type == REF_FISSION_EXT) + { +#if PYOPENCL_CL_VERSION >= 0x1020 + cl_platform_id plat; + PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (m_device, CL_DEVICE_PLATFORM, + sizeof(plat), &plat, NULL)); +#endif + + PYOPENCL_GET_EXT_FUN(plat, + clReleaseDeviceEXT, release_func); + + PYOPENCL_CALL_GUARDED_CLEANUP(release_func, (m_device)); + } +#endif + +#if PYOPENCL_CL_VERSION >= 0x1020 + else if (m_ref_type == REF_CL_1_2) + PYOPENCL_CALL_GUARDED(clReleaseDevice, (m_device)); +#endif + } + + cl_device_id data() const + { + return m_device; + } + + PYOPENCL_EQUALITY_TESTS(device); + + py::object get_info(cl_device_info param_name) const + { +#define DEV_GET_INT_INF(TYPE) \ + PYOPENCL_GET_INTEGRAL_INFO(Device, m_device, param_name, TYPE); + + switch (param_name) + { + case CL_DEVICE_TYPE: DEV_GET_INT_INF(cl_device_type); + case CL_DEVICE_VENDOR_ID: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_COMPUTE_UNITS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_WORK_GROUP_SIZE: DEV_GET_INT_INF(size_t); + + case CL_DEVICE_MAX_WORK_ITEM_SIZES: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + PYOPENCL_RETURN_VECTOR(size_t, result); + } + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: DEV_GET_INT_INF(cl_uint); + + case CL_DEVICE_MAX_CLOCK_FREQUENCY: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_ADDRESS_BITS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_READ_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_MEM_ALLOC_SIZE: DEV_GET_INT_INF(cl_ulong); + case CL_DEVICE_IMAGE2D_MAX_WIDTH: DEV_GET_INT_INF(size_t); + case CL_DEVICE_IMAGE2D_MAX_HEIGHT: DEV_GET_INT_INF(size_t); + case CL_DEVICE_IMAGE3D_MAX_WIDTH: DEV_GET_INT_INF(size_t); + case CL_DEVICE_IMAGE3D_MAX_HEIGHT: DEV_GET_INT_INF(size_t); + case CL_DEVICE_IMAGE3D_MAX_DEPTH: DEV_GET_INT_INF(size_t); + case CL_DEVICE_IMAGE_SUPPORT: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_MAX_PARAMETER_SIZE: DEV_GET_INT_INF(size_t); + case CL_DEVICE_MAX_SAMPLERS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MEM_BASE_ADDR_ALIGN: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_SINGLE_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config); +#ifdef CL_DEVICE_DOUBLE_FP_CONFIG + case CL_DEVICE_DOUBLE_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config); +#endif +#ifdef CL_DEVICE_HALF_FP_CONFIG + case CL_DEVICE_HALF_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config); +#endif + + case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: DEV_GET_INT_INF(cl_device_mem_cache_type); + case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: DEV_GET_INT_INF(cl_ulong); + case CL_DEVICE_GLOBAL_MEM_SIZE: DEV_GET_INT_INF(cl_ulong); + + case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: DEV_GET_INT_INF(cl_ulong); + case CL_DEVICE_MAX_CONSTANT_ARGS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_LOCAL_MEM_TYPE: DEV_GET_INT_INF(cl_device_local_mem_type); + case CL_DEVICE_LOCAL_MEM_SIZE: DEV_GET_INT_INF(cl_ulong); + case CL_DEVICE_ERROR_CORRECTION_SUPPORT: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_PROFILING_TIMER_RESOLUTION: DEV_GET_INT_INF(size_t); + case CL_DEVICE_ENDIAN_LITTLE: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_AVAILABLE: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_COMPILER_AVAILABLE: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_EXECUTION_CAPABILITIES: DEV_GET_INT_INF(cl_device_exec_capabilities); + case CL_DEVICE_QUEUE_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties); + + case CL_DEVICE_NAME: + case CL_DEVICE_VENDOR: + case CL_DRIVER_VERSION: + case CL_DEVICE_PROFILE: + case CL_DEVICE_VERSION: + case CL_DEVICE_EXTENSIONS: + PYOPENCL_GET_STR_INFO(Device, m_device, param_name); + + case CL_DEVICE_PLATFORM: + PYOPENCL_GET_OPAQUE_INFO(Device, m_device, param_name, cl_platform_id, platform); + +#if PYOPENCL_CL_VERSION >= 0x1010 + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: DEV_GET_INT_INF(cl_uint); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: DEV_GET_INT_INF(cl_uint); + + case CL_DEVICE_HOST_UNIFIED_MEMORY: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_OPENCL_C_VERSION: + PYOPENCL_GET_STR_INFO(Device, m_device, param_name); +#endif +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV + case CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV: + case CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV: + case CL_DEVICE_REGISTERS_PER_BLOCK_NV: + case CL_DEVICE_WARP_SIZE_NV: + DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_GPU_OVERLAP_NV: + case CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV: + case CL_DEVICE_INTEGRATED_MEMORY_NV: + DEV_GET_INT_INF(cl_bool); +#endif +#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) + case CL_DEVICE_PARENT_DEVICE_EXT: + PYOPENCL_GET_OPAQUE_INFO(Device, m_device, param_name, cl_device_id, device); + case CL_DEVICE_PARTITION_TYPES_EXT: + case CL_DEVICE_AFFINITY_DOMAINS_EXT: + case CL_DEVICE_PARTITION_STYLE_EXT: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + PYOPENCL_RETURN_VECTOR(cl_device_partition_property_ext, result); + } + case CL_DEVICE_REFERENCE_COUNT_EXT: DEV_GET_INT_INF(cl_uint); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_DEVICE_LINKER_AVAILABLE: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_BUILT_IN_KERNELS: + PYOPENCL_GET_STR_INFO(Device, m_device, param_name); + case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: DEV_GET_INT_INF(size_t); + case CL_DEVICE_IMAGE_MAX_ARRAY_SIZE: DEV_GET_INT_INF(size_t); + case CL_DEVICE_PARENT_DEVICE: + PYOPENCL_GET_OPAQUE_INFO(Device, m_device, param_name, cl_device_id, device); + case CL_DEVICE_PARTITION_MAX_SUB_DEVICES: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PARTITION_TYPE: + case CL_DEVICE_PARTITION_PROPERTIES: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + PYOPENCL_RETURN_VECTOR(cl_device_partition_property, result); + } + case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + PYOPENCL_RETURN_VECTOR(cl_device_affinity_domain, result); + } + case CL_DEVICE_REFERENCE_COUNT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_PRINTF_BUFFER_SIZE: DEV_GET_INT_INF(cl_bool); +#endif +// {{{ AMD dev attrs +// +// types of AMD dev attrs divined from +// https://www.khronos.org/registry/cl/api/1.2/cl.hpp +#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD + case CL_DEVICE_PROFILING_TIMER_OFFSET_AMD: DEV_GET_INT_INF(cl_ulong); +#endif +/* FIXME +#ifdef CL_DEVICE_TOPOLOGY_AMD + case CL_DEVICE_TOPOLOGY_AMD: +#endif +*/ +#ifdef CL_DEVICE_BOARD_NAME_AMD + case CL_DEVICE_BOARD_NAME_AMD: ; + PYOPENCL_GET_STR_INFO(Device, m_device, param_name); +#endif +#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD + case CL_DEVICE_GLOBAL_FREE_MEMORY_AMD: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + PYOPENCL_RETURN_VECTOR(size_t, result); + } +#endif +#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD + case CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_SIMD_WIDTH_AMD + case CL_DEVICE_SIMD_WIDTH_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD + case CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD + case CL_DEVICE_WAVEFRONT_WIDTH_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD + case CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD + case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD + case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD + case CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD + case CL_DEVICE_LOCAL_MEM_BANKS_AMD: DEV_GET_INT_INF(cl_uint); +#endif +// }}} + +#ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT + case CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT: DEV_GET_INT_INF(cl_uint); +#endif + + default: + throw error("Device.get_info", CL_INVALID_VALUE); + } + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + py::list create_sub_devices(py::object py_properties) + { + std::vector properties; + + COPY_PY_LIST(cl_device_partition_property, properties); + properties.push_back(0); + + cl_device_partition_property *props_ptr + = properties.empty( ) ? NULL : &properties.front(); + + cl_uint num_entries; + PYOPENCL_CALL_GUARDED(clCreateSubDevices, + (m_device, props_ptr, 0, NULL, &num_entries)); + + std::vector result; + result.resize(num_entries); + + PYOPENCL_CALL_GUARDED(clCreateSubDevices, + (m_device, props_ptr, num_entries, &result.front(), NULL)); + + py::list py_result; + BOOST_FOREACH(cl_device_id did, result) + py_result.append(handle_from_new_ptr( + new pyopencl::device(did, /*retain*/true, + device::REF_CL_1_2))); + return py_result; + } +#endif + +#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) + py::list create_sub_devices_ext(py::object py_properties) + { + std::vector properties; + +#if PYOPENCL_CL_VERSION >= 0x1020 + cl_platform_id plat; + PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (m_device, CL_DEVICE_PLATFORM, + sizeof(plat), &plat, NULL)); +#endif + + PYOPENCL_GET_EXT_FUN(plat, clCreateSubDevicesEXT, create_sub_dev); + + COPY_PY_LIST(cl_device_partition_property_ext, properties); + properties.push_back(CL_PROPERTIES_LIST_END_EXT); + + cl_device_partition_property_ext *props_ptr + = properties.empty( ) ? NULL : &properties.front(); + + cl_uint num_entries; + PYOPENCL_CALL_GUARDED(create_sub_dev, + (m_device, props_ptr, 0, NULL, &num_entries)); + + std::vector result; + result.resize(num_entries); + + PYOPENCL_CALL_GUARDED(create_sub_dev, + (m_device, props_ptr, num_entries, &result.front(), NULL)); + + py::list py_result; + BOOST_FOREACH(cl_device_id did, result) + py_result.append(handle_from_new_ptr( + new pyopencl::device(did, /*retain*/true, + device::REF_FISSION_EXT))); + return py_result; + } +#endif + + }; + + + + + inline py::list platform::get_devices(cl_device_type devtype) + { + cl_uint num_devices = 0; + PYOPENCL_PRINT_CALL_TRACE("clGetDeviceIDs"); + { + cl_int status_code; + status_code = clGetDeviceIDs(m_platform, devtype, 0, 0, &num_devices); + if (status_code == CL_DEVICE_NOT_FOUND) + num_devices = 0; + else if (status_code != CL_SUCCESS) \ + throw pyopencl::error("clGetDeviceIDs", status_code); + } + + if (num_devices == 0) + return py::list(); + + std::vector devices(num_devices); + PYOPENCL_CALL_GUARDED(clGetDeviceIDs, + (m_platform, devtype, + num_devices, devices.empty( ) ? NULL : &devices.front(), &num_devices)); + + py::list result; + BOOST_FOREACH(cl_device_id did, devices) + result.append(handle_from_new_ptr( + new device(did))); + + return result; + } + + // }}} + + // {{{ context + class context : public boost::noncopyable + { + private: + cl_context m_context; + + public: + context(cl_context ctx, bool retain) + : m_context(ctx) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainContext, (ctx)); + } + + + ~context() + { + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseContext, + (m_context)); + } + + cl_context data() const + { + return m_context; + } + + PYOPENCL_EQUALITY_TESTS(context); + + py::object get_info(cl_context_info param_name) const + { + switch (param_name) + { + case CL_CONTEXT_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO( + Context, m_context, param_name, cl_uint); + + case CL_CONTEXT_DEVICES: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Context, m_context, param_name, result); + + py::list py_result; + BOOST_FOREACH(cl_device_id did, result) + py_result.append(handle_from_new_ptr( + new pyopencl::device(did))); + return py_result; + } + + case CL_CONTEXT_PROPERTIES: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Context, m_context, param_name, result); + + py::list py_result; + for (size_t i = 0; i < result.size(); i+=2) + { + cl_context_properties key = result[i]; + py::object value; + switch (key) + { + case CL_CONTEXT_PLATFORM: + { + value = py::object( + handle_from_new_ptr(new platform( + reinterpret_cast(result[i+1])))); + break; + } + +#if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1) +#if defined(__APPLE__) && defined(HAVE_GL) + case CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE: +#else + case CL_GL_CONTEXT_KHR: + case CL_EGL_DISPLAY_KHR: + case CL_GLX_DISPLAY_KHR: + case CL_WGL_HDC_KHR: + case CL_CGL_SHAREGROUP_KHR: +#endif + value = py::object(result[i+1]); + break; + +#endif + case 0: + break; + + default: + throw error("Context.get_info", CL_INVALID_VALUE, + "unknown context_property key encountered"); + } + + py_result.append(py::make_tuple(result[i], value)); + } + return py_result; + } + +#if PYOPENCL_CL_VERSION >= 0x1010 + case CL_CONTEXT_NUM_DEVICES: + PYOPENCL_GET_INTEGRAL_INFO( + Context, m_context, param_name, cl_uint); +#endif + + default: + throw error("Context.get_info", CL_INVALID_VALUE); + } + } + }; + + + + + inline + std::vector parse_context_properties( + py::object py_properties) + { + std::vector props; + + if (py_properties.ptr() != Py_None) + { + PYTHON_FOREACH(prop_tuple, py_properties) + { + if (len(prop_tuple) != 2) + throw error("Context", CL_INVALID_VALUE, "property tuple must have length 2"); + cl_context_properties prop = + py::extract(prop_tuple[0]); + props.push_back(prop); + + if (prop == CL_CONTEXT_PLATFORM) + { + py::extract value(prop_tuple[1]); + props.push_back( + reinterpret_cast(value().data())); + } +#if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1) +#if defined(_WIN32) + else if (prop == CL_WGL_HDC_KHR) + { + // size_t is a stand-in for HANDLE, hopefully has the same size. + size_t hnd = py::extract(prop_tuple[1]); + props.push_back(hnd); + } +#endif + else if ( +#if defined(__APPLE__) && defined(HAVE_GL) + prop == CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE +#else + prop == CL_GL_CONTEXT_KHR + || prop == CL_EGL_DISPLAY_KHR + || prop == CL_GLX_DISPLAY_KHR + || prop == CL_CGL_SHAREGROUP_KHR +#endif + ) + { + py::object ctypes = py::import("ctypes"); + py::object prop = prop_tuple[1], c_void_p = ctypes.attr("c_void_p"); + py::object ptr = ctypes.attr("cast")(prop, c_void_p); + py::extract value(ptr.attr("value")); + props.push_back(value); + } +#endif + else + throw error("Context", CL_INVALID_VALUE, "invalid context property"); + } + props.push_back(0); + } + + return props; + } + + + + + inline + context *create_context_inner(py::object py_devices, py::object py_properties, + py::object py_dev_type) + { + std::vector props + = parse_context_properties(py_properties); + + cl_context_properties *props_ptr + = props.empty( ) ? NULL : &props.front(); + + cl_int status_code; + + cl_context ctx; + + // from device list + if (py_devices.ptr() != Py_None) + { + if (py_dev_type.ptr() != Py_None) + throw error("Context", CL_INVALID_VALUE, + "one of 'devices' or 'dev_type' must be None"); + + std::vector devices; + PYTHON_FOREACH(py_dev, py_devices) + { + py::extract dev(py_dev); + devices.push_back(dev().data()); + } + + PYOPENCL_PRINT_CALL_TRACE("clCreateContext"); + ctx = clCreateContext( + props_ptr, + devices.size(), + devices.empty( ) ? NULL : &devices.front(), + 0, 0, &status_code); + } + // from dev_type + else + { + cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT; + if (py_dev_type.ptr() != Py_None) + dev_type = py::extract(py_dev_type)(); + + PYOPENCL_PRINT_CALL_TRACE("clCreateContextFromType"); + ctx = clCreateContextFromType(props_ptr, dev_type, 0, 0, &status_code); + } + + if (status_code != CL_SUCCESS) + throw pyopencl::error("Context", status_code); + + try + { + return new context(ctx, false); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseContext, (ctx)); + throw; + } + } + + + + + inline + context *create_context(py::object py_devices, py::object py_properties, + py::object py_dev_type) + { + PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( + return create_context_inner(py_devices, py_properties, py_dev_type); + ) + } + + + + + + // }}} + + // {{{ command_queue + class command_queue + { + private: + cl_command_queue m_queue; + + public: + command_queue(cl_command_queue q, bool retain) + : m_queue(q) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (q)); + } + + command_queue(command_queue const &src) + : m_queue(src.m_queue) + { + PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue)); + } + + command_queue( + const context &ctx, + const device *py_dev=0, + cl_command_queue_properties props=0) + { + cl_device_id dev; + if (py_dev) + dev = py_dev->data(); + else + { + std::vector devs; + PYOPENCL_GET_VEC_INFO(Context, ctx.data(), CL_CONTEXT_DEVICES, devs); + if (devs.size() == 0) + throw pyopencl::error("CommandQueue", CL_INVALID_VALUE, + "context doesn't have any devices? -- don't know which one to default to"); + dev = devs[0]; + } + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueue"); + m_queue = clCreateCommandQueue( + ctx.data(), dev, props, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("CommandQueue", status_code); + } + + ~command_queue() + { + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseCommandQueue, + (m_queue)); + } + + const cl_command_queue data() const + { return m_queue; } + + PYOPENCL_EQUALITY_TESTS(command_queue); + + py::object get_info(cl_command_queue_info param_name) const + { + switch (param_name) + { + case CL_QUEUE_CONTEXT: + PYOPENCL_GET_OPAQUE_INFO(CommandQueue, m_queue, param_name, + cl_context, context); + case CL_QUEUE_DEVICE: + PYOPENCL_GET_OPAQUE_INFO(CommandQueue, m_queue, param_name, + cl_device_id, device); + case CL_QUEUE_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(CommandQueue, m_queue, param_name, + cl_uint); + case CL_QUEUE_PROPERTIES: + PYOPENCL_GET_INTEGRAL_INFO(CommandQueue, m_queue, param_name, + cl_command_queue_properties); + + default: + throw error("CommandQueue.get_info", CL_INVALID_VALUE); + } + } + + std::auto_ptr get_context() const + { + cl_context param_value; + PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, + (m_queue, CL_QUEUE_CONTEXT, sizeof(param_value), ¶m_value, 0)); + return std::auto_ptr( + new context(param_value, /*retain*/ true)); + } + +#if PYOPENCL_CL_VERSION < 0x1010 + cl_command_queue_properties set_property( + cl_command_queue_properties prop, + bool enable) + { + cl_command_queue_properties old_prop; + PYOPENCL_CALL_GUARDED(clSetCommandQueueProperty, + (m_queue, prop, PYOPENCL_CAST_BOOL(enable), &old_prop)); + return old_prop; + } +#endif + + void flush() + { PYOPENCL_CALL_GUARDED(clFlush, (m_queue)); } + void finish() + { PYOPENCL_CALL_GUARDED_THREADED(clFinish, (m_queue)); } + }; + + // }}} + + // {{{ event/synchronization + class event : boost::noncopyable + { + private: + cl_event m_event; + + public: + event(cl_event event, bool retain) + : m_event(event) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainEvent, (event)); + } + + event(event const &src) + : m_event(src.m_event) + { PYOPENCL_CALL_GUARDED(clRetainEvent, (m_event)); } + + virtual ~event() + { + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseEvent, + (m_event)); + } + + const cl_event data() const + { return m_event; } + + PYOPENCL_EQUALITY_TESTS(event); + + py::object get_info(cl_event_info param_name) const + { + switch (param_name) + { + case CL_EVENT_COMMAND_QUEUE: + PYOPENCL_GET_OPAQUE_INFO(Event, m_event, param_name, + cl_command_queue, command_queue); + case CL_EVENT_COMMAND_TYPE: + PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name, + cl_command_type); + case CL_EVENT_COMMAND_EXECUTION_STATUS: + PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name, + cl_int); + case CL_EVENT_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name, + cl_uint); +#if PYOPENCL_CL_VERSION >= 0x1010 + case CL_EVENT_CONTEXT: + PYOPENCL_GET_OPAQUE_INFO(Event, m_event, param_name, + cl_context, context); +#endif + + default: + throw error("Event.get_info", CL_INVALID_VALUE); + } + } + + py::object get_profiling_info(cl_profiling_info param_name) const + { + switch (param_name) + { + case CL_PROFILING_COMMAND_QUEUED: + case CL_PROFILING_COMMAND_SUBMIT: + case CL_PROFILING_COMMAND_START: + case CL_PROFILING_COMMAND_END: + PYOPENCL_GET_INTEGRAL_INFO(EventProfiling, m_event, param_name, + cl_ulong); + default: + throw error("Event.get_profiling_info", CL_INVALID_VALUE); + } + } + + virtual void wait() + { + PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, (1, &m_event)); + } + }; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + class nanny_event : public event + { + // In addition to everything an event does, the nanny event holds a reference + // to a Python object and waits for its own completion upon destruction. + + protected: + std::auto_ptr m_ward; + + public: + + nanny_event(cl_event evt, bool retain, std::auto_ptr &ward) + : event(evt, retain), m_ward(ward) + { } + + ~nanny_event() + { wait(); } + + py::object get_ward() const + { + if (m_ward.get()) + { + return py::object(py::handle<>(py::borrowed( + m_ward->m_buf.obj))); + } + else + return py::object(); + } + + virtual void wait() + { + event::wait(); + m_ward.reset(); + } + }; +#else + class nanny_event : public event + { + // In addition to everything an event does, the nanny event holds a reference + // to a Python object and waits for its own completion upon destruction. + + protected: + py::object m_ward; + + public: + + nanny_event(cl_event evt, bool retain, py::object ward) + : event(evt, retain), m_ward(ward) + { } + + nanny_event(nanny_event const &src) + : event(src), m_ward(src.m_ward) + { } + + ~nanny_event() + { wait(); } + + py::object get_ward() const + { return m_ward; } + + virtual void wait() + { + event::wait(); + m_ward = py::object(); + } + }; +#endif + + + + + inline + void wait_for_events(py::object events) + { + cl_uint num_events_in_wait_list = 0; + std::vector event_wait_list(len(events)); + + PYTHON_FOREACH(evt, events) + event_wait_list[num_events_in_wait_list++] = + py::extract(evt)().data(); + + PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, ( + PYOPENCL_WAITLIST_ARGS)); + } + + + + +#if PYOPENCL_CL_VERSION >= 0x1020 + inline + event *enqueue_marker_with_wait_list(command_queue &cq, + py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + cl_event evt; + + PYOPENCL_CALL_GUARDED(clEnqueueMarkerWithWaitList, ( + cq.data(), PYOPENCL_WAITLIST_ARGS, &evt)); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + inline + event *enqueue_barrier_with_wait_list(command_queue &cq, + py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + cl_event evt; + + PYOPENCL_CALL_GUARDED(clEnqueueBarrierWithWaitList, + (cq.data(), PYOPENCL_WAITLIST_ARGS, &evt)); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } +#endif + + + // {{{ used internally for pre-OpenCL-1.2 contexts + + inline + event *enqueue_marker(command_queue &cq) + { + cl_event evt; + + PYOPENCL_CALL_GUARDED(clEnqueueMarker, ( + cq.data(), &evt)); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + inline + void enqueue_wait_for_events(command_queue &cq, py::object py_events) + { + cl_uint num_events = 0; + std::vector event_list(len(py_events)); + + PYTHON_FOREACH(py_evt, py_events) + event_list[num_events++] = + py::extract(py_evt)().data(); + + PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, ( + cq.data(), num_events, event_list.empty( ) ? NULL : &event_list.front())); + } + + inline + void enqueue_barrier(command_queue &cq) + { + PYOPENCL_CALL_GUARDED(clEnqueueBarrier, (cq.data())); + } + + // }}} + + +#if PYOPENCL_CL_VERSION >= 0x1010 + class user_event : public event + { + public: + user_event(cl_event evt, bool retain) + : event(evt, retain) + { } + + void set_status(cl_int execution_status) + { + PYOPENCL_CALL_GUARDED(clSetUserEventStatus, (data(), execution_status)); + } + }; + + + + + inline + event *create_user_event(context &ctx) + { + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateUserEvent"); + cl_event evt = clCreateUserEvent(ctx.data(), &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("UserEvent", status_code); + + try + { + return new user_event(evt, false); + } + catch (...) + { + clReleaseEvent(evt); + throw; + } + } + +#endif + + // }}} + + // {{{ memory_object + + py::object create_mem_object_wrapper(cl_mem mem); + + class memory_object_holder + { + public: + virtual const cl_mem data() const = 0; + + PYOPENCL_EQUALITY_TESTS(memory_object_holder); + + size_t size() const + { + size_t param_value; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (data(), CL_MEM_SIZE, sizeof(param_value), ¶m_value, 0)); + return param_value; + } + + py::object get_info(cl_mem_info param_name) const; + }; + + + + + class memory_object : boost::noncopyable, public memory_object_holder + { + public: +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + typedef std::auto_ptr hostbuf_t; +#else + typedef py::object hostbuf_t; +#endif + + private: + bool m_valid; + cl_mem m_mem; + hostbuf_t m_hostbuf; + + public: + memory_object(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) + : m_valid(true), m_mem(mem) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainMemObject, (mem)); + + m_hostbuf = hostbuf; + } + + memory_object(memory_object &src) + : m_valid(true), m_mem(src.m_mem), m_hostbuf(src.m_hostbuf) + { + PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem)); + } + + memory_object(memory_object_holder const &src) + : m_valid(true), m_mem(src.data()) + { + PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem)); + } + + void release() + { + if (!m_valid) + throw error("MemoryObject.free", CL_INVALID_VALUE, + "trying to double-unref mem object"); + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseMemObject, (m_mem)); + m_valid = false; + } + + virtual ~memory_object() + { + if (m_valid) + release(); + } + + py::object hostbuf() + { +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + if (m_hostbuf.get()) + { + return py::object(py::handle<>(py::borrowed( + m_hostbuf->m_buf.obj))); + } + else + return py::object(); +#else + return m_hostbuf; +#endif + } + + const cl_mem data() const + { return m_mem; } + + }; + +#if PYOPENCL_CL_VERSION >= 0x1020 + inline + event *enqueue_migrate_mem_objects( + command_queue &cq, + py::object py_mem_objects, + cl_mem_migration_flags flags, + py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + + std::vector mem_objects; + PYTHON_FOREACH(mo, py_mem_objects) + mem_objects.push_back(py::extract(mo)().data()); + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueMigrateMemObjects, ( + cq.data(), + mem_objects.size(), mem_objects.empty( ) ? NULL : &mem_objects.front(), + flags, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_EVENT(evt); + } +#endif + +#ifdef cl_ext_migrate_memobject + inline + event *enqueue_migrate_mem_object_ext( + command_queue &cq, + py::object py_mem_objects, + cl_mem_migration_flags_ext flags, + py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + +#if PYOPENCL_CL_VERSION >= 0x1020 + // {{{ get platform + cl_device_id dev; + PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, (cq.data(), CL_QUEUE_DEVICE, + sizeof(dev), &dev, NULL)); + cl_platform_id plat; + PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_PLATFORM, + sizeof(plat), &plat, NULL)); + // }}} +#endif + + PYOPENCL_GET_EXT_FUN(plat, + clEnqueueMigrateMemObjectEXT, enqueue_migrate_fn); + + std::vector mem_objects; + PYTHON_FOREACH(mo, py_mem_objects) + mem_objects.push_back(py::extract(mo)().data()); + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(enqueue_migrate_fn, ( + cq.data(), + mem_objects.size(), mem_objects.empty( ) ? NULL : &mem_objects.front(), + flags, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_EVENT(evt); + } +#endif + + // }}} + + // {{{ buffer + + inline cl_mem create_buffer( + cl_context ctx, + cl_mem_flags flags, + size_t size, + void *host_ptr) + { + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateBuffer"); + cl_mem mem = clCreateBuffer(ctx, flags, size, host_ptr, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("create_buffer", status_code); + + return mem; + } + + + + + inline cl_mem create_buffer_gc( + cl_context ctx, + cl_mem_flags flags, + size_t size, + void *host_ptr) + { + PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( + return create_buffer(ctx, flags, size, host_ptr); + ); + } + + + +#if PYOPENCL_CL_VERSION >= 0x1010 + inline cl_mem create_sub_buffer( + cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bct, + const void *buffer_create_info) + { + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateSubBuffer"); + cl_mem mem = clCreateSubBuffer(buffer, flags, + bct, buffer_create_info, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateSubBuffer", status_code); + + return mem; + } + + + + + inline cl_mem create_sub_buffer_gc( + cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bct, + const void *buffer_create_info) + { + PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( + return create_sub_buffer(buffer, flags, bct, buffer_create_info); + ); + } +#endif + + + + class buffer : public memory_object + { + public: + buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) + : memory_object(mem, retain, hostbuf) + { } + +#if PYOPENCL_CL_VERSION >= 0x1010 + buffer *get_sub_region( + size_t origin, size_t size, cl_mem_flags flags) const + { + cl_buffer_region region = { origin, size}; + + cl_mem mem = create_sub_buffer_gc( + data(), flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion); + + try + { + return new buffer(mem, false); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); + throw; + } + } + + buffer *getitem(py::slice slc) const + { + PYOPENCL_BUFFER_SIZE_T start, end, stride, length; + + size_t my_length; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (data(), CL_MEM_SIZE, sizeof(my_length), &my_length, 0)); + +#if PY_VERSION_HEX >= 0x03020000 + if (PySlice_GetIndicesEx(slc.ptr(), +#else + if (PySlice_GetIndicesEx(reinterpret_cast(slc.ptr()), +#endif + my_length, &start, &end, &stride, &length) != 0) + throw py::error_already_set(); + + if (stride != 1) + throw pyopencl::error("Buffer.__getitem__", CL_INVALID_VALUE, + "Buffer slice must have stride 1"); + + cl_mem_flags my_flags; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (data(), CL_MEM_FLAGS, sizeof(my_flags), &my_flags, 0)); + + my_flags &= ~CL_MEM_COPY_HOST_PTR; + + if (end <= start) + throw pyopencl::error("Buffer.__getitem__", CL_INVALID_VALUE, + "Buffer slice have end > start"); + + return get_sub_region(start, end-start, my_flags); + } +#endif + }; + + // {{{ buffer creation + + inline + buffer *create_buffer_py( + context &ctx, + cl_mem_flags flags, + size_t size, + py::object py_hostbuf + ) + { + if (py_hostbuf.ptr() != Py_None && + !(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) + PyErr_Warn(PyExc_UserWarning, "'hostbuf' was passed, " + "but no memory flags to make use of it."); + + void *buf = 0; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::auto_ptr retained_buf_obj; + if (py_hostbuf.ptr() != Py_None) + { + retained_buf_obj = std::auto_ptr(new py_buffer_wrapper); + + int py_buf_flags = PyBUF_ANY_CONTIGUOUS; + if ((flags & CL_MEM_USE_HOST_PTR) + && ((flags & CL_MEM_READ_WRITE) + || (flags & CL_MEM_WRITE_ONLY))) + py_buf_flags |= PyBUF_WRITABLE; + + retained_buf_obj->get(py_hostbuf.ptr(), py_buf_flags); + + buf = retained_buf_obj->m_buf.buf; + + if (size > size_t(retained_buf_obj->m_buf.len)) + throw pyopencl::error("Buffer", CL_INVALID_VALUE, + "specified size is greater than host buffer size"); + if (size == 0) + size = retained_buf_obj->m_buf.len; + } +#else + py::object retained_buf_obj; + if (py_hostbuf.ptr() != Py_None) + { + PYOPENCL_BUFFER_SIZE_T len; + if ((flags & CL_MEM_USE_HOST_PTR) + && ((flags & CL_MEM_READ_WRITE) + || (flags & CL_MEM_WRITE_ONLY))) + { + if (PyObject_AsWriteBuffer(py_hostbuf.ptr(), &buf, &len)) + throw py::error_already_set(); + } + else + { + if (PyObject_AsReadBuffer( + py_hostbuf.ptr(), const_cast(&buf), &len)) + throw py::error_already_set(); + } + + if (flags & CL_MEM_USE_HOST_PTR) + retained_buf_obj = py_hostbuf; + + if (size > size_t(len)) + throw pyopencl::error("Buffer", CL_INVALID_VALUE, + "specified size is greater than host buffer size"); + if (size == 0) + size = len; + } +#endif + + cl_mem mem = create_buffer_gc(ctx.data(), flags, size, buf); + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + if (!(flags & CL_MEM_USE_HOST_PTR)) + retained_buf_obj.reset(); +#endif + + try + { + return new buffer(mem, false, retained_buf_obj); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); + throw; + } + } + + // }}} + + // {{{ buffer transfers + + // {{{ byte-for-byte transfers + + inline + event *enqueue_read_buffer( + command_queue &cq, + memory_object_holder &mem, + py::object buffer, + size_t device_offset, + py::object py_wait_for, + bool is_blocking) + { + PYOPENCL_PARSE_WAIT_FOR; + + void *buf; + PYOPENCL_BUFFER_SIZE_T len; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::auto_ptr ward(new py_buffer_wrapper); + + ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); + + buf = ward->m_buf.buf; + len = ward->m_buf.len; +#else + py::object ward = buffer; + if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED_THREADED(clEnqueueReadBuffer, ( + cq.data(), + mem.data(), + PYOPENCL_CAST_BOOL(is_blocking), + device_offset, len, buf, + PYOPENCL_WAITLIST_ARGS, &evt + )) + ); + PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); + } + + + + + inline + event *enqueue_write_buffer( + command_queue &cq, + memory_object_holder &mem, + py::object buffer, + size_t device_offset, + py::object py_wait_for, + bool is_blocking) + { + PYOPENCL_PARSE_WAIT_FOR; + + const void *buf; + PYOPENCL_BUFFER_SIZE_T len; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::auto_ptr ward(new py_buffer_wrapper); + + ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS); + + buf = ward->m_buf.buf; + len = ward->m_buf.len; +#else + py::object ward = buffer; + if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED_THREADED(clEnqueueWriteBuffer, ( + cq.data(), + mem.data(), + PYOPENCL_CAST_BOOL(is_blocking), + device_offset, len, buf, + PYOPENCL_WAITLIST_ARGS, &evt + )) + ); + PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); + } + + + + + inline + event *enqueue_copy_buffer( + command_queue &cq, + memory_object_holder &src, + memory_object_holder &dst, + ptrdiff_t byte_count, + size_t src_offset, + size_t dst_offset, + py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + + if (byte_count < 0) + { + size_t byte_count_src = 0; + size_t byte_count_dst = 0; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_src, 0)); + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_dst, 0)); + byte_count = std::min(byte_count_src, byte_count_dst); + } + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueCopyBuffer, ( + cq.data(), + src.data(), dst.data(), + src_offset, dst_offset, + byte_count, + PYOPENCL_WAITLIST_ARGS, + &evt + )) + ); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + // }}} + + // {{{ rectangular transfers +#if PYOPENCL_CL_VERSION >= 0x1010 + inline + event *enqueue_read_buffer_rect( + command_queue &cq, + memory_object_holder &mem, + py::object buffer, + py::object py_buffer_origin, + py::object py_host_origin, + py::object py_region, + py::object py_buffer_pitches, + py::object py_host_pitches, + py::object py_wait_for, + bool is_blocking + ) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(buffer_origin); + COPY_PY_COORD_TRIPLE(host_origin); + COPY_PY_REGION_TRIPLE(region); + COPY_PY_PITCH_TUPLE(buffer_pitches); + COPY_PY_PITCH_TUPLE(host_pitches); + + void *buf; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::auto_ptr ward(new py_buffer_wrapper); + + ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); + + buf = ward->m_buf.buf; +#else + py::object ward = buffer; + + PYOPENCL_BUFFER_SIZE_T len; + if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED_THREADED(clEnqueueReadBufferRect, ( + cq.data(), + mem.data(), + PYOPENCL_CAST_BOOL(is_blocking), + buffer_origin, host_origin, region, + buffer_pitches[0], buffer_pitches[1], + host_pitches[0], host_pitches[1], + buf, + PYOPENCL_WAITLIST_ARGS, &evt + )) + ); + PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); + } + + + + + inline + event *enqueue_write_buffer_rect( + command_queue &cq, + memory_object_holder &mem, + py::object buffer, + py::object py_buffer_origin, + py::object py_host_origin, + py::object py_region, + py::object py_buffer_pitches, + py::object py_host_pitches, + py::object py_wait_for, + bool is_blocking + ) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(buffer_origin); + COPY_PY_COORD_TRIPLE(host_origin); + COPY_PY_REGION_TRIPLE(region); + COPY_PY_PITCH_TUPLE(buffer_pitches); + COPY_PY_PITCH_TUPLE(host_pitches); + + const void *buf; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::auto_ptr ward(new py_buffer_wrapper); + + ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS); + + buf = ward->m_buf.buf; +#else + py::object ward = buffer; + PYOPENCL_BUFFER_SIZE_T len; + if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED_THREADED(clEnqueueWriteBufferRect, ( + cq.data(), + mem.data(), + PYOPENCL_CAST_BOOL(is_blocking), + buffer_origin, host_origin, region, + buffer_pitches[0], buffer_pitches[1], + host_pitches[0], host_pitches[1], + buf, + PYOPENCL_WAITLIST_ARGS, &evt + )) + ); + PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); + } + + + + + inline + event *enqueue_copy_buffer_rect( + command_queue &cq, + memory_object_holder &src, + memory_object_holder &dst, + py::object py_src_origin, + py::object py_dst_origin, + py::object py_region, + py::object py_src_pitches, + py::object py_dst_pitches, + py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(src_origin); + COPY_PY_COORD_TRIPLE(dst_origin); + COPY_PY_REGION_TRIPLE(region); + COPY_PY_PITCH_TUPLE(src_pitches); + COPY_PY_PITCH_TUPLE(dst_pitches); + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueCopyBufferRect, ( + cq.data(), + src.data(), dst.data(), + src_origin, dst_origin, region, + src_pitches[0], src_pitches[1], + dst_pitches[0], dst_pitches[1], + PYOPENCL_WAITLIST_ARGS, + &evt + )) + ); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + +#endif + + // }}} + + // }}} + +#if PYOPENCL_CL_VERSION >= 0x1020 + inline + event *enqueue_fill_buffer( + command_queue &cq, + memory_object_holder &mem, + py::object pattern, + size_t offset, + size_t size, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + const void *pattern_buf; + PYOPENCL_BUFFER_SIZE_T pattern_len; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::auto_ptr ward(new py_buffer_wrapper); + + ward->get(pattern.ptr(), PyBUF_ANY_CONTIGUOUS); + + pattern_buf = ward->m_buf.buf; + pattern_len = ward->m_buf.len; +#else + if (PyObject_AsReadBuffer(pattern.ptr(), &pattern_buf, &pattern_len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueFillBuffer, ( + cq.data(), + mem.data(), + pattern_buf, pattern_len, offset, size, + PYOPENCL_WAITLIST_ARGS, &evt + )) + ); + PYOPENCL_RETURN_NEW_EVENT(evt); + } +#endif + + // }}} + + // {{{ image + + class image : public memory_object + { + public: + image(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) + : memory_object(mem, retain, hostbuf) + { } + + py::object get_image_info(cl_image_info param_name) const + { + switch (param_name) + { + case CL_IMAGE_FORMAT: + PYOPENCL_GET_INTEGRAL_INFO(Image, data(), param_name, + cl_image_format); + case CL_IMAGE_ELEMENT_SIZE: + case CL_IMAGE_ROW_PITCH: + case CL_IMAGE_SLICE_PITCH: + case CL_IMAGE_WIDTH: + case CL_IMAGE_HEIGHT: + case CL_IMAGE_DEPTH: +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_IMAGE_ARRAY_SIZE: +#endif + PYOPENCL_GET_INTEGRAL_INFO(Image, data(), param_name, size_t); + +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_IMAGE_BUFFER: + { + cl_mem param_value; + PYOPENCL_CALL_GUARDED(clGetImageInfo, \ + (data(), param_name, sizeof(param_value), ¶m_value, 0)); + if (param_value == 0) + { + // no associated memory object? no problem. + return py::object(); + } + + return create_mem_object_wrapper(param_value); + } + + case CL_IMAGE_NUM_MIP_LEVELS: + case CL_IMAGE_NUM_SAMPLES: + PYOPENCL_GET_INTEGRAL_INFO(Image, data(), param_name, cl_uint); +#endif + + default: + throw error("MemoryObject.get_image_info", CL_INVALID_VALUE); + } + } + }; + + + + + // {{{ image formats + + inline + cl_image_format *make_image_format(cl_channel_order ord, cl_channel_type tp) + { + std::auto_ptr result(new cl_image_format); + result->image_channel_order = ord; + result->image_channel_data_type = tp; + return result.release(); + } + + inline + py::list get_supported_image_formats( + context const &ctx, + cl_mem_flags flags, + cl_mem_object_type image_type) + { + cl_uint num_image_formats; + PYOPENCL_CALL_GUARDED(clGetSupportedImageFormats, ( + ctx.data(), flags, image_type, + 0, NULL, &num_image_formats)); + + std::vector formats(num_image_formats); + PYOPENCL_CALL_GUARDED(clGetSupportedImageFormats, ( + ctx.data(), flags, image_type, + formats.size(), formats.empty( ) ? NULL : &formats.front(), NULL)); + + PYOPENCL_RETURN_VECTOR(cl_image_format, formats); + } + + inline + cl_uint get_image_format_channel_count(cl_image_format const &fmt) + { + switch (fmt.image_channel_order) + { + case CL_R: return 1; + case CL_A: return 1; + case CL_RG: return 2; + case CL_RA: return 2; + case CL_RGB: return 3; + case CL_RGBA: return 4; + case CL_BGRA: return 4; + case CL_INTENSITY: return 1; + case CL_LUMINANCE: return 1; + default: + throw pyopencl::error("ImageFormat.channel_dtype_size", + CL_INVALID_VALUE, + "unrecognized channel order"); + } + } + + inline + cl_uint get_image_format_channel_dtype_size(cl_image_format const &fmt) + { + switch (fmt.image_channel_data_type) + { + case CL_SNORM_INT8: return 1; + case CL_SNORM_INT16: return 2; + case CL_UNORM_INT8: return 1; + case CL_UNORM_INT16: return 2; + case CL_UNORM_SHORT_565: return 2; + case CL_UNORM_SHORT_555: return 2; + case CL_UNORM_INT_101010: return 4; + case CL_SIGNED_INT8: return 1; + case CL_SIGNED_INT16: return 2; + case CL_SIGNED_INT32: return 4; + case CL_UNSIGNED_INT8: return 1; + case CL_UNSIGNED_INT16: return 2; + case CL_UNSIGNED_INT32: return 4; + case CL_HALF_FLOAT: return 2; + case CL_FLOAT: return 4; + default: + throw pyopencl::error("ImageFormat.channel_dtype_size", + CL_INVALID_VALUE, + "unrecognized channel data type"); + } + } + + inline + cl_uint get_image_format_item_size(cl_image_format const &fmt) + { + return get_image_format_channel_count(fmt) + * get_image_format_channel_dtype_size(fmt); + } + + // }}} + + // {{{ image creation + + inline + image *create_image( + context const &ctx, + cl_mem_flags flags, + cl_image_format const &fmt, + py::object shape, + py::object pitches, + py::object buffer) + { + if (shape.ptr() == Py_None) + throw pyopencl::error("Image", CL_INVALID_VALUE, + "'shape' must be given"); + + void *buf = 0; + PYOPENCL_BUFFER_SIZE_T len; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::auto_ptr retained_buf_obj; + if (buffer.ptr() != Py_None) + { + retained_buf_obj = std::auto_ptr(new py_buffer_wrapper); + + int py_buf_flags = PyBUF_ANY_CONTIGUOUS; + if ((flags & CL_MEM_USE_HOST_PTR) + && ((flags & CL_MEM_READ_WRITE) + || (flags & CL_MEM_WRITE_ONLY))) + py_buf_flags |= PyBUF_WRITABLE; + + retained_buf_obj->get(buffer.ptr(), py_buf_flags); + + buf = retained_buf_obj->m_buf.buf; + len = retained_buf_obj->m_buf.len; + } +#else + py::object retained_buf_obj; + if (buffer.ptr() != Py_None) + { + if ((flags & CL_MEM_USE_HOST_PTR) + && ((flags & CL_MEM_READ_WRITE) + || (flags & CL_MEM_WRITE_ONLY))) + { + if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); + } + else + { + if (PyObject_AsReadBuffer( + buffer.ptr(), const_cast(&buf), &len)) + throw py::error_already_set(); + } + + if (flags & CL_MEM_USE_HOST_PTR) + retained_buf_obj = buffer; + } +#endif + + unsigned dims = py::len(shape); + cl_int status_code; + cl_mem mem; + if (dims == 2) + { + size_t width = py::extract(shape[0]); + size_t height = py::extract(shape[1]); + + size_t pitch = 0; + if (pitches.ptr() != Py_None) + { + if (py::len(pitches) != 1) + throw pyopencl::error("Image", CL_INVALID_VALUE, + "invalid length of pitch tuple"); + pitch = py::extract(pitches[0]); + } + + // check buffer size + cl_int itemsize = get_image_format_item_size(fmt); + if (buf && std::max(pitch, width*itemsize)*height > cl_uint(len)) + throw pyopencl::error("Image", CL_INVALID_VALUE, + "buffer too small"); + + PYOPENCL_PRINT_CALL_TRACE("clCreateImage2D"); + PYOPENCL_RETRY_IF_MEM_ERROR( + { + mem = clCreateImage2D(ctx.data(), flags, &fmt, + width, height, pitch, buf, &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateImage2D", status_code); + } ); + + } + else if (dims == 3) + { + size_t width = py::extract(shape[0]); + size_t height = py::extract(shape[1]); + size_t depth = py::extract(shape[2]); + + size_t pitch_x = 0; + size_t pitch_y = 0; + + if (pitches.ptr() != Py_None) + { + if (py::len(pitches) != 2) + throw pyopencl::error("Image", CL_INVALID_VALUE, + "invalid length of pitch tuple"); + + pitch_x = py::extract(pitches[0]); + pitch_y = py::extract(pitches[1]); + } + + // check buffer size + cl_int itemsize = get_image_format_item_size(fmt); + if (buf && + std::max(std::max(pitch_x, width*itemsize)*height, pitch_y) + * depth > cl_uint(len)) + throw pyopencl::error("Image", CL_INVALID_VALUE, + "buffer too small"); + + PYOPENCL_PRINT_CALL_TRACE("clCreateImage3D"); + PYOPENCL_RETRY_IF_MEM_ERROR( + { + mem = clCreateImage3D(ctx.data(), flags, &fmt, + width, height, depth, pitch_x, pitch_y, buf, &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateImage3D", status_code); + } ); + } + else + throw pyopencl::error("Image", CL_INVALID_VALUE, + "invalid dimension"); + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + if (!(flags & CL_MEM_USE_HOST_PTR)) + retained_buf_obj.reset(); +#endif + + try + { + return new image(mem, false, retained_buf_obj); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); + throw; + } + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + + inline + image *create_image_from_desc( + context const &ctx, + cl_mem_flags flags, + cl_image_format const &fmt, + cl_image_desc &desc, + py::object buffer) + { + if (buffer.ptr() != Py_None && + !(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) + PyErr_Warn(PyExc_UserWarning, "'hostbuf' was passed, " + "but no memory flags to make use of it."); + + void *buf = 0; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::auto_ptr retained_buf_obj; + if (buffer.ptr() != Py_None) + { + retained_buf_obj = std::auto_ptr(new py_buffer_wrapper); + + int py_buf_flags = PyBUF_ANY_CONTIGUOUS; + if ((flags & CL_MEM_USE_HOST_PTR) + && ((flags & CL_MEM_READ_WRITE) + || (flags & CL_MEM_WRITE_ONLY))) + py_buf_flags |= PyBUF_WRITABLE; + + retained_buf_obj->get(buffer.ptr(), py_buf_flags); + + buf = retained_buf_obj->m_buf.buf; + } +#else + py::object retained_buf_obj; + PYOPENCL_BUFFER_SIZE_T len; + if (buffer.ptr() != Py_None) + { + if ((flags & CL_MEM_USE_HOST_PTR) + && ((flags & CL_MEM_READ_WRITE) + || (flags & CL_MEM_WRITE_ONLY))) + { + if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); + } + else + { + if (PyObject_AsReadBuffer( + buffer.ptr(), const_cast(&buf), &len)) + throw py::error_already_set(); + } + + if (flags & CL_MEM_USE_HOST_PTR) + retained_buf_obj = buffer; + } +#endif + + PYOPENCL_PRINT_CALL_TRACE("clCreateImage"); + cl_int status_code; + cl_mem mem = clCreateImage(ctx.data(), flags, &fmt, &desc, buf, &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateImage", status_code); + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + if (!(flags & CL_MEM_USE_HOST_PTR)) + retained_buf_obj.reset(); +#endif + + try + { + return new image(mem, false, retained_buf_obj); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); + throw; + } + } + +#endif + + // }}} + + // {{{ image transfers + + inline + event *enqueue_read_image( + command_queue &cq, + image &img, + py::object py_origin, py::object py_region, + py::object buffer, + size_t row_pitch, size_t slice_pitch, + py::object py_wait_for, + bool is_blocking) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(origin); + COPY_PY_REGION_TRIPLE(region); + + void *buf; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::auto_ptr ward(new py_buffer_wrapper); + + ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); + + buf = ward->m_buf.buf; +#else + py::object ward = buffer; + PYOPENCL_BUFFER_SIZE_T len; + if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + cl_event evt; + + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueReadImage, ( + cq.data(), + img.data(), + PYOPENCL_CAST_BOOL(is_blocking), + origin, region, row_pitch, slice_pitch, buf, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); + } + + + + + inline + event *enqueue_write_image( + command_queue &cq, + image &img, + py::object py_origin, py::object py_region, + py::object buffer, + size_t row_pitch, size_t slice_pitch, + py::object py_wait_for, + bool is_blocking) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(origin); + COPY_PY_REGION_TRIPLE(region); + + const void *buf; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::auto_ptr ward(new py_buffer_wrapper); + + ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS); + + buf = ward->m_buf.buf; +#else + py::object ward = buffer; + PYOPENCL_BUFFER_SIZE_T len; + if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueWriteImage, ( + cq.data(), + img.data(), + PYOPENCL_CAST_BOOL(is_blocking), + origin, region, row_pitch, slice_pitch, buf, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); + } + + + + + inline + event *enqueue_copy_image( + command_queue &cq, + memory_object_holder &src, + memory_object_holder &dest, + py::object py_src_origin, + py::object py_dest_origin, + py::object py_region, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(src_origin); + COPY_PY_COORD_TRIPLE(dest_origin); + COPY_PY_REGION_TRIPLE(region); + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueCopyImage, ( + cq.data(), src.data(), dest.data(), + src_origin, dest_origin, region, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + + + + inline + event *enqueue_copy_image_to_buffer( + command_queue &cq, + memory_object_holder &src, + memory_object_holder &dest, + py::object py_origin, + py::object py_region, + size_t offset, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(origin); + COPY_PY_REGION_TRIPLE(region); + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueCopyImageToBuffer, ( + cq.data(), src.data(), dest.data(), + origin, region, offset, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + + + + inline + event *enqueue_copy_buffer_to_image( + command_queue &cq, + memory_object_holder &src, + memory_object_holder &dest, + size_t offset, + py::object py_origin, + py::object py_region, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(origin); + COPY_PY_REGION_TRIPLE(region); + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueCopyBufferToImage, ( + cq.data(), src.data(), dest.data(), + offset, origin, region, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + // }}} + +#if PYOPENCL_CL_VERSION >= 0x1020 + inline + event *enqueue_fill_image( + command_queue &cq, + memory_object_holder &mem, + py::object color, + py::object py_origin, py::object py_region, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + COPY_PY_COORD_TRIPLE(origin); + COPY_PY_REGION_TRIPLE(region); + + const void *color_buf; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::auto_ptr ward(new py_buffer_wrapper); + + ward->get(color.ptr(), PyBUF_ANY_CONTIGUOUS); + + color_buf = ward->m_buf.buf; +#else + PYOPENCL_BUFFER_SIZE_T color_len; + if (PyObject_AsReadBuffer(color.ptr(), &color_buf, &color_len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueFillImage, ( + cq.data(), + mem.data(), + color_buf, origin, region, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_EVENT(evt); + } +#endif + + // }}} + + // {{{ maps + class memory_map + { + private: + bool m_valid; + command_queue m_queue; + memory_object m_mem; + void *m_ptr; + + public: + memory_map(command_queue &cq, memory_object const &mem, void *ptr) + : m_valid(true), m_queue(cq), m_mem(mem), m_ptr(ptr) + { + } + + ~memory_map() + { + if (m_valid) + delete release(0, py::object()); + } + + event *release(command_queue *cq, py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + + if (cq == 0) + cq = &m_queue; + + cl_event evt; + PYOPENCL_CALL_GUARDED(clEnqueueUnmapMemObject, ( + cq->data(), m_mem.data(), m_ptr, + PYOPENCL_WAITLIST_ARGS, &evt + )); + + m_valid = false; + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + }; + + + + + inline + py::object enqueue_map_buffer( + command_queue &cq, + memory_object_holder &buf, + cl_map_flags flags, + size_t offset, + py::object py_shape, py::object dtype, + py::object py_order, py::object py_strides, + py::object py_wait_for, + bool is_blocking + ) + { + PYOPENCL_PARSE_WAIT_FOR; + PYOPENCL_PARSE_NUMPY_ARRAY_SPEC; + + npy_uintp size_in_bytes = tp_descr->elsize; + BOOST_FOREACH(npy_intp sdim, shape) + size_in_bytes *= sdim; + + py::handle<> result; + + cl_event evt; + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clEnqueueMapBuffer"); + void *mapped; + + PYOPENCL_RETRY_IF_MEM_ERROR( + { + Py_BEGIN_ALLOW_THREADS + mapped = clEnqueueMapBuffer( + cq.data(), buf.data(), + PYOPENCL_CAST_BOOL(is_blocking), flags, + offset, size_in_bytes, + PYOPENCL_WAITLIST_ARGS, &evt, + &status_code); + Py_END_ALLOW_THREADS + if (status_code != CL_SUCCESS) + throw pyopencl::error("clEnqueueMapBuffer", status_code); + } ); + + event evt_handle(evt, false); + + std::auto_ptr map; + try + { + result = py::handle<>(PyArray_NewFromDescr( + &PyArray_Type, tp_descr, + shape.size(), + shape.empty() ? NULL : &shape.front(), + strides.empty() ? NULL : &strides.front(), + mapped, ary_flags, /*obj*/NULL)); + + if (size_in_bytes != (npy_uintp) PyArray_NBYTES(result.get())) + throw pyopencl::error("enqueue_map_buffer", CL_INVALID_VALUE, + "miscalculated numpy array size (not contiguous?)"); + + map = std::auto_ptr(new memory_map(cq, buf, mapped)); + } + catch (...) + { + PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueUnmapMemObject, ( + cq.data(), buf.data(), mapped, 0, 0, 0)); + throw; + } + + py::handle<> map_py(handle_from_new_ptr(map.release())); + PyArray_BASE(result.get()) = map_py.get(); + Py_INCREF(map_py.get()); + + return py::make_tuple( + result, + handle_from_new_ptr(new event(evt_handle))); + } + + + + + inline + py::object enqueue_map_image( + command_queue &cq, + memory_object_holder &img, + cl_map_flags flags, + py::object py_origin, + py::object py_region, + py::object py_shape, py::object dtype, + py::object py_order, py::object py_strides, + py::object py_wait_for, + bool is_blocking + ) + { + PYOPENCL_PARSE_WAIT_FOR; + PYOPENCL_PARSE_NUMPY_ARRAY_SPEC; + COPY_PY_COORD_TRIPLE(origin); + COPY_PY_REGION_TRIPLE(region); + + cl_event evt; + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clEnqueueMapImage"); + size_t row_pitch, slice_pitch; + void *mapped; + PYOPENCL_RETRY_IF_MEM_ERROR( + { + Py_BEGIN_ALLOW_THREADS + mapped = clEnqueueMapImage( + cq.data(), img.data(), + PYOPENCL_CAST_BOOL(is_blocking), flags, + origin, region, &row_pitch, &slice_pitch, + PYOPENCL_WAITLIST_ARGS, &evt, + &status_code); + Py_END_ALLOW_THREADS + if (status_code != CL_SUCCESS) + throw pyopencl::error("clEnqueueMapImage", status_code); + } ); + + event evt_handle(evt, false); + + std::auto_ptr map; + try + { + map = std::auto_ptr(new memory_map(cq, img, mapped)); + } + catch (...) + { + PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueUnmapMemObject, ( + cq.data(), img.data(), mapped, 0, 0, 0)); + throw; + } + + py::handle<> result = py::handle<>(PyArray_NewFromDescr( + &PyArray_Type, tp_descr, + shape.size(), + shape.empty() ? NULL : &shape.front(), + strides.empty() ? NULL : &strides.front(), + mapped, ary_flags, /*obj*/NULL)); + + py::handle<> map_py(handle_from_new_ptr(map.release())); + PyArray_BASE(result.get()) = map_py.get(); + Py_INCREF(map_py.get()); + + return py::make_tuple( + result, + handle_from_new_ptr(new event(evt_handle)), + row_pitch, slice_pitch); + } + + // }}} + + // {{{ sampler + class sampler : boost::noncopyable + { + private: + cl_sampler m_sampler; + + public: + sampler(context const &ctx, bool normalized_coordinates, + cl_addressing_mode am, cl_filter_mode fm) + { + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateSampler"); + m_sampler = clCreateSampler( + ctx.data(), + normalized_coordinates, + am, fm, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("Sampler", status_code); + } + + sampler(cl_sampler samp, bool retain) + : m_sampler(samp) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainSampler, (samp)); + } + + ~sampler() + { + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseSampler, (m_sampler)); + } + + cl_sampler data() const + { + return m_sampler; + } + + PYOPENCL_EQUALITY_TESTS(sampler); + + py::object get_info(cl_sampler_info param_name) const + { + switch (param_name) + { + case CL_SAMPLER_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name, + cl_uint); + case CL_SAMPLER_CONTEXT: + PYOPENCL_GET_OPAQUE_INFO(Sampler, m_sampler, param_name, + cl_context, context); + case CL_SAMPLER_ADDRESSING_MODE: + PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name, + cl_addressing_mode); + case CL_SAMPLER_FILTER_MODE: + PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name, + cl_filter_mode); + case CL_SAMPLER_NORMALIZED_COORDS: + PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name, + cl_bool); + + default: + throw error("Sampler.get_info", CL_INVALID_VALUE); + } + } + }; + + // }}} + + // {{{ program + + class program : boost::noncopyable + { + public: + enum program_kind_type { KND_UNKNOWN, KND_SOURCE, KND_BINARY }; + + private: + cl_program m_program; + program_kind_type m_program_kind; + + public: + program(cl_program prog, bool retain, program_kind_type progkind=KND_UNKNOWN) + : m_program(prog), m_program_kind(progkind) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainProgram, (prog)); + } + + ~program() + { + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseProgram, (m_program)); + } + + cl_program data() const + { + return m_program; + } + + program_kind_type kind() const + { + return m_program_kind; + } + + PYOPENCL_EQUALITY_TESTS(program); + + py::object get_info(cl_program_info param_name) const + { + switch (param_name) + { + case CL_PROGRAM_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name, + cl_uint); + case CL_PROGRAM_CONTEXT: + PYOPENCL_GET_OPAQUE_INFO(Program, m_program, param_name, + cl_context, context); + case CL_PROGRAM_NUM_DEVICES: + PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name, + cl_uint); + case CL_PROGRAM_DEVICES: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result); + + py::list py_result; + BOOST_FOREACH(cl_device_id did, result) + py_result.append(handle_from_new_ptr( + new pyopencl::device(did))); + return py_result; + } + case CL_PROGRAM_SOURCE: + PYOPENCL_GET_STR_INFO(Program, m_program, param_name); + case CL_PROGRAM_BINARY_SIZES: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result); + PYOPENCL_RETURN_VECTOR(size_t, result); + } + case CL_PROGRAM_BINARIES: + // {{{ + { + std::vector sizes; + PYOPENCL_GET_VEC_INFO(Program, m_program, CL_PROGRAM_BINARY_SIZES, sizes); + + size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0); + + boost::scoped_array result( + new unsigned char[total_size]); + std::vector result_ptrs; + + unsigned char *ptr = result.get(); + for (unsigned i = 0; i < sizes.size(); ++i) + { + result_ptrs.push_back(ptr); + ptr += sizes[i]; + } + + PYOPENCL_CALL_GUARDED(clGetProgramInfo, + (m_program, param_name, sizes.size()*sizeof(unsigned char *), + result_ptrs.empty( ) ? NULL : &result_ptrs.front(), 0)); \ + + py::list py_result; + ptr = result.get(); + for (unsigned i = 0; i < sizes.size(); ++i) + { + py::handle<> binary_pyobj( +#if PY_VERSION_HEX >= 0x03000000 + PyBytes_FromStringAndSize( + reinterpret_cast(ptr), sizes[i]) +#else + PyString_FromStringAndSize( + reinterpret_cast(ptr), sizes[i]) +#endif + ); + py_result.append(binary_pyobj); + ptr += sizes[i]; + } + return py_result; + } + // }}} +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_PROGRAM_NUM_KERNELS: + PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name, + size_t); + case CL_PROGRAM_KERNEL_NAMES: + PYOPENCL_GET_STR_INFO(Program, m_program, param_name); +#endif + + default: + throw error("Program.get_info", CL_INVALID_VALUE); + } + } + + py::object get_build_info( + device const &dev, + cl_program_build_info param_name) const + { + switch (param_name) + { +#define PYOPENCL_FIRST_ARG m_program, dev.data() // hackety hack + case CL_PROGRAM_BUILD_STATUS: + PYOPENCL_GET_INTEGRAL_INFO(ProgramBuild, + PYOPENCL_FIRST_ARG, param_name, + cl_build_status); + case CL_PROGRAM_BUILD_OPTIONS: + case CL_PROGRAM_BUILD_LOG: + PYOPENCL_GET_STR_INFO(ProgramBuild, + PYOPENCL_FIRST_ARG, param_name); +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_PROGRAM_BINARY_TYPE: + PYOPENCL_GET_INTEGRAL_INFO(ProgramBuild, + PYOPENCL_FIRST_ARG, param_name, + cl_program_binary_type); +#endif +#undef PYOPENCL_FIRST_ARG + + default: + throw error("Program.get_build_info", CL_INVALID_VALUE); + } + } + + void build(std::string options, py::object py_devices) + { + PYOPENCL_PARSE_PY_DEVICES; + + PYOPENCL_CALL_GUARDED_THREADED(clBuildProgram, + (m_program, num_devices, devices, + options.c_str(), 0 ,0)); + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + void compile(std::string options, py::object py_devices, + py::object py_headers) + { + PYOPENCL_PARSE_PY_DEVICES; + + // {{{ pick apart py_headers + // py_headers is a list of tuples *(name, program)* + + std::vector header_names; + std::vector programs; + PYTHON_FOREACH(name_hdr_tup, py_headers) + { + if (py::len(name_hdr_tup) != 2) + throw error("Program.compile", CL_INVALID_VALUE, + "epxected (name, header) tuple in headers list"); + std::string name = py::extract(name_hdr_tup[0]); + program &prg = py::extract(name_hdr_tup[1]); + + header_names.push_back(name); + programs.push_back(prg.data()); + } + + std::vector header_name_ptrs; + BOOST_FOREACH(std::string const &name, header_names) + header_name_ptrs.push_back(name.c_str()); + + // }}} + + PYOPENCL_CALL_GUARDED_THREADED(clCompileProgram, + (m_program, num_devices, devices, + options.c_str(), header_names.size(), + programs.empty() ? NULL : &programs.front(), + header_name_ptrs.empty() ? NULL : &header_name_ptrs.front(), + 0, 0)); + } +#endif + }; + + + + + inline + program *create_program_with_source( + context &ctx, + std::string const &src) + { + const char *string = src.c_str(); + size_t length = src.size(); + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithSource"); + cl_program result = clCreateProgramWithSource( + ctx.data(), 1, &string, &length, &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateProgramWithSource", status_code); + + try + { + return new program(result, false, program::KND_SOURCE); + } + catch (...) + { + clReleaseProgram(result); + throw; + } + } + + + + + + inline + program *create_program_with_binary( + context &ctx, + py::object py_devices, + py::object py_binaries) + { + std::vector devices; + std::vector binaries; + std::vector sizes; + std::vector binary_statuses; + + int num_devices = len(py_devices); + if (len(py_binaries) != num_devices) + throw error("create_program_with_binary", CL_INVALID_VALUE, + "device and binary counts don't match"); + + for (int i = 0; i < num_devices; ++i) + { + devices.push_back( + py::extract(py_devices[i])().data()); + const void *buf; + PYOPENCL_BUFFER_SIZE_T len; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + py_buffer_wrapper buf_wrapper; + + buf_wrapper.get(py::object(py_binaries[i]).ptr(), PyBUF_ANY_CONTIGUOUS); + + buf = buf_wrapper.m_buf.buf; + len = buf_wrapper.m_buf.len; +#else + if (PyObject_AsReadBuffer( + py::object(py_binaries[i]).ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + binaries.push_back(reinterpret_cast(buf)); + sizes.push_back(len); + } + + binary_statuses.resize(num_devices); + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBinary"); + cl_program result = clCreateProgramWithBinary( + ctx.data(), num_devices, + devices.empty( ) ? NULL : &devices.front(), + sizes.empty( ) ? NULL : &sizes.front(), + binaries.empty( ) ? NULL : &binaries.front(), + binary_statuses.empty( ) ? NULL : &binary_statuses.front(), + &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateProgramWithBinary", status_code); + + /* + for (int i = 0; i < num_devices; ++i) + printf("%d:%d\n", i, binary_statuses[i]); + */ + + try + { + return new program(result, false, program::KND_BINARY); + } + catch (...) + { + clReleaseProgram(result); + throw; + } + } + + + +#if (PYOPENCL_CL_VERSION >= 0x1020) && \ + ((PYOPENCL_CL_VERSION >= 0x1030) && defined(__APPLE__)) + inline + program *create_program_with_built_in_kernels( + context &ctx, + py::object py_devices, + std::string const &kernel_names) + { + PYOPENCL_PARSE_PY_DEVICES; + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBuiltInKernels"); + cl_program result = clCreateProgramWithBuiltInKernels( + ctx.data(), num_devices, devices, + kernel_names.c_str(), &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateProgramWithBuiltInKernels", status_code); + + try + { + return new program(result, false); + } + catch (...) + { + clReleaseProgram(result); + throw; + } + } +#endif + + + +#if PYOPENCL_CL_VERSION >= 0x1020 + inline + program *link_program( + context &ctx, + py::object py_programs, + std::string const &options, + py::object py_devices + ) + { + PYOPENCL_PARSE_PY_DEVICES; + + std::vector programs; + PYTHON_FOREACH(py_prg, py_programs) + { + program &prg = py::extract(py_prg); + programs.push_back(prg.data()); + } + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clLinkProgram"); + cl_program result = clLinkProgram( + ctx.data(), num_devices, devices, + options.c_str(), + programs.size(), + programs.empty() ? NULL : &programs.front(), + 0, 0, + &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("clLinkPorgram", status_code); + + try + { + return new program(result, false); + } + catch (...) + { + clReleaseProgram(result); + throw; + } + } + +#endif + + +#if PYOPENCL_CL_VERSION >= 0x1020 + inline + void unload_platform_compiler(platform &plat) + { + PYOPENCL_CALL_GUARDED(clUnloadPlatformCompiler, (plat.data())); + } +#endif + + // }}} + + // {{{ kernel + class local_memory + { + private: + size_t m_size; + + public: + local_memory(size_t size) + : m_size(size) + { } + + size_t size() const + { return m_size; } + }; + + + + + class kernel : boost::noncopyable + { + private: + cl_kernel m_kernel; + + public: + kernel(cl_kernel knl, bool retain) + : m_kernel(knl) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainKernel, (knl)); + } + + kernel(program const &prg, std::string const &kernel_name) + { + cl_int status_code; + + PYOPENCL_PRINT_CALL_TRACE("clCreateKernel"); + m_kernel = clCreateKernel(prg.data(), kernel_name.c_str(), + &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateKernel", status_code); + } + + ~kernel() + { + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseKernel, (m_kernel)); + } + + cl_kernel data() const + { + return m_kernel; + } + + PYOPENCL_EQUALITY_TESTS(kernel); + + void set_arg_null(cl_uint arg_index) + { + cl_mem m = 0; + PYOPENCL_CALL_GUARDED(clSetKernelArg, (m_kernel, arg_index, + sizeof(cl_mem), &m)); + } + + void set_arg_mem(cl_uint arg_index, memory_object_holder &moh) + { + cl_mem m = moh.data(); + PYOPENCL_CALL_GUARDED(clSetKernelArg, + (m_kernel, arg_index, sizeof(cl_mem), &m)); + } + + void set_arg_local(cl_uint arg_index, local_memory const &loc) + { + PYOPENCL_CALL_GUARDED(clSetKernelArg, + (m_kernel, arg_index, loc.size(), 0)); + } + + void set_arg_sampler(cl_uint arg_index, sampler const &smp) + { + cl_sampler s = smp.data(); + PYOPENCL_CALL_GUARDED(clSetKernelArg, + (m_kernel, arg_index, sizeof(cl_sampler), &s)); + } + + void set_arg_buf(cl_uint arg_index, py::object py_buffer) + { + const void *buf; + PYOPENCL_BUFFER_SIZE_T len; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + py_buffer_wrapper buf_wrapper; + + try + { + buf_wrapper.get(py_buffer.ptr(), PyBUF_ANY_CONTIGUOUS); + } + catch (py::error_already_set) + { + PyErr_Clear(); + throw error("Kernel.set_arg", CL_INVALID_VALUE, + "invalid kernel argument"); + } + + buf = buf_wrapper.m_buf.buf; + len = buf_wrapper.m_buf.len; +#else + if (PyObject_AsReadBuffer(py_buffer.ptr(), &buf, &len)) + { + PyErr_Clear(); + throw error("Kernel.set_arg", CL_INVALID_VALUE, + "invalid kernel argument"); + } +#endif + + PYOPENCL_CALL_GUARDED(clSetKernelArg, + (m_kernel, arg_index, len, buf)); + } + + void set_arg(cl_uint arg_index, py::object arg) + { + if (arg.ptr() == Py_None) + { + set_arg_null(arg_index); + return; + } + + py::extract ex_mo(arg); + if (ex_mo.check()) + { + set_arg_mem(arg_index, ex_mo()); + return; + } + + py::extract ex_loc(arg); + if (ex_loc.check()) + { + set_arg_local(arg_index, ex_loc()); + return; + } + + py::extract ex_smp(arg); + if (ex_smp.check()) + { + set_arg_sampler(arg_index, ex_smp()); + return; + } + + set_arg_buf(arg_index, arg); + } + + py::object get_info(cl_kernel_info param_name) const + { + switch (param_name) + { + case CL_KERNEL_FUNCTION_NAME: + PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name); + case CL_KERNEL_NUM_ARGS: + case CL_KERNEL_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(Kernel, m_kernel, param_name, + cl_uint); + case CL_KERNEL_CONTEXT: + PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name, + cl_context, context); + case CL_KERNEL_PROGRAM: + PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name, + cl_program, program); +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_KERNEL_ATTRIBUTES: + PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name); +#endif + default: + throw error("Kernel.get_info", CL_INVALID_VALUE); + } + } + + py::object get_work_group_info( + cl_kernel_work_group_info param_name, + device const &dev + ) const + { + switch (param_name) + { +#define PYOPENCL_FIRST_ARG m_kernel, dev.data() // hackety hack + case CL_KERNEL_WORK_GROUP_SIZE: + PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup, + PYOPENCL_FIRST_ARG, param_name, + size_t); + case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(KernelWorkGroup, + PYOPENCL_FIRST_ARG, param_name, result); + + PYOPENCL_RETURN_VECTOR(size_t, result); + } + case CL_KERNEL_LOCAL_MEM_SIZE: +#if PYOPENCL_CL_VERSION >= 0x1010 + case CL_KERNEL_PRIVATE_MEM_SIZE: +#endif + PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup, + PYOPENCL_FIRST_ARG, param_name, + cl_ulong); + +#if PYOPENCL_CL_VERSION >= 0x1010 + case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: + PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup, + PYOPENCL_FIRST_ARG, param_name, + size_t); +#endif + default: + throw error("Kernel.get_work_group_info", CL_INVALID_VALUE); +#undef PYOPENCL_FIRST_ARG + } + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + py::object get_arg_info( + cl_uint arg_index, + cl_kernel_arg_info param_name + ) const + { + switch (param_name) + { +#define PYOPENCL_FIRST_ARG m_kernel, arg_index // hackety hack + case CL_KERNEL_ARG_ADDRESS_QUALIFIER: + PYOPENCL_GET_INTEGRAL_INFO(KernelArg, + PYOPENCL_FIRST_ARG, param_name, + cl_kernel_arg_address_qualifier); + + case CL_KERNEL_ARG_ACCESS_QUALIFIER: + PYOPENCL_GET_INTEGRAL_INFO(KernelArg, + PYOPENCL_FIRST_ARG, param_name, + cl_kernel_arg_access_qualifier); + + case CL_KERNEL_ARG_TYPE_NAME: + case CL_KERNEL_ARG_NAME: + PYOPENCL_GET_STR_INFO(KernelArg, PYOPENCL_FIRST_ARG, param_name); +#undef PYOPENCL_FIRST_ARG + default: + throw error("Kernel.get_arg_info", CL_INVALID_VALUE); + } + } +#endif + }; + + + inline + py::list create_kernels_in_program(program &pgm) + { + cl_uint num_kernels; + PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, ( + pgm.data(), 0, 0, &num_kernels)); + + std::vector kernels(num_kernels); + PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, ( + pgm.data(), num_kernels, + kernels.empty( ) ? NULL : &kernels.front(), &num_kernels)); + + py::list result; + BOOST_FOREACH(cl_kernel knl, kernels) + result.append(handle_from_new_ptr(new kernel(knl, true))); + + return result; + } + + + + inline + event *enqueue_nd_range_kernel( + command_queue &cq, + kernel &knl, + py::object py_global_work_size, + py::object py_local_work_size, + py::object py_global_work_offset, + py::object py_wait_for, + bool g_times_l) + { + PYOPENCL_PARSE_WAIT_FOR; + + cl_uint work_dim = len(py_global_work_size); + + std::vector global_work_size; + COPY_PY_LIST(size_t, global_work_size); + + size_t *local_work_size_ptr = 0; + std::vector local_work_size; + if (py_local_work_size.ptr() != Py_None) + { + if (g_times_l) + work_dim = std::max(work_dim, unsigned(len(py_local_work_size))); + else + if (work_dim != unsigned(len(py_local_work_size))) + throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE, + "global/local work sizes have differing dimensions"); + + COPY_PY_LIST(size_t, local_work_size); + + while (local_work_size.size() < work_dim) + local_work_size.push_back(1); + while (global_work_size.size() < work_dim) + global_work_size.push_back(1); + + local_work_size_ptr = local_work_size.empty( ) ? NULL : &local_work_size.front(); + } + + if (g_times_l && local_work_size_ptr) + { + for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis) + global_work_size[work_axis] *= local_work_size[work_axis]; + } + + size_t *global_work_offset_ptr = 0; + std::vector global_work_offset; + if (py_global_work_offset.ptr() != Py_None) + { + if (work_dim != unsigned(len(py_global_work_offset))) + throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE, + "global work size and offset have differing dimensions"); + + COPY_PY_LIST(size_t, global_work_offset); + + if (g_times_l && local_work_size_ptr) + { + for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis) + global_work_offset[work_axis] *= local_work_size[work_axis]; + } + + global_work_offset_ptr = global_work_offset.empty( ) ? NULL : &global_work_offset.front(); + } + + PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( { + cl_event evt; + PYOPENCL_CALL_GUARDED(clEnqueueNDRangeKernel, ( + cq.data(), + knl.data(), + work_dim, + global_work_offset_ptr, + global_work_size.empty( ) ? NULL : &global_work_size.front(), + local_work_size_ptr, + PYOPENCL_WAITLIST_ARGS, &evt + )); + PYOPENCL_RETURN_NEW_EVENT(evt); + } ); + } + + + + + + + inline + event *enqueue_task( + command_queue &cq, + kernel &knl, + py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + + PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( { + cl_event evt; + PYOPENCL_CALL_GUARDED(clEnqueueTask, ( + cq.data(), + knl.data(), + PYOPENCL_WAITLIST_ARGS, &evt + )); + PYOPENCL_RETURN_NEW_EVENT(evt); + } ); + } + + // }}} + + // {{{ gl interop + inline + bool have_gl() + { +#ifdef HAVE_GL + return true; +#else + return false; +#endif + } + + + + +#ifdef HAVE_GL + +#ifdef __APPLE__ + inline + cl_context_properties get_apple_cgl_share_group() + { + CGLContextObj kCGLContext = CGLGetCurrentContext(); + CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext); + + return (cl_context_properties) kCGLShareGroup; + } +#endif /* __APPLE__ */ + + + + + class gl_buffer : public memory_object + { + public: + gl_buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) + : memory_object(mem, retain, hostbuf) + { } + }; + + + + + class gl_renderbuffer : public memory_object + { + public: + gl_renderbuffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) + : memory_object(mem, retain, hostbuf) + { } + }; + + + + + class gl_texture : public image + { + public: + gl_texture(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) + : image(mem, retain, hostbuf) + { } + + py::object get_gl_texture_info(cl_gl_texture_info param_name) + { + switch (param_name) + { + case CL_GL_TEXTURE_TARGET: + PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLenum); + case CL_GL_MIPMAP_LEVEL: + PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLint); + + default: + throw error("MemoryObject.get_gl_texture_info", CL_INVALID_VALUE); + } + } + }; + + + + +#define PYOPENCL_WRAP_BUFFER_CREATOR(TYPE, NAME, CL_NAME, ARGS, CL_ARGS) \ + inline \ + TYPE *NAME ARGS \ + { \ + cl_int status_code; \ + PYOPENCL_PRINT_CALL_TRACE(#CL_NAME); \ + cl_mem mem = CL_NAME CL_ARGS; \ + \ + if (status_code != CL_SUCCESS) \ + throw pyopencl::error(#CL_NAME, status_code); \ + \ + try \ + { \ + return new TYPE(mem, false); \ + } \ + catch (...) \ + { \ + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); \ + throw; \ + } \ + } + + + + + PYOPENCL_WRAP_BUFFER_CREATOR(gl_buffer, + create_from_gl_buffer, clCreateFromGLBuffer, + (context &ctx, cl_mem_flags flags, GLuint bufobj), + (ctx.data(), flags, bufobj, &status_code)); + PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture, + create_from_gl_texture_2d, clCreateFromGLTexture2D, + (context &ctx, cl_mem_flags flags, + GLenum texture_target, GLint miplevel, GLuint texture), + (ctx.data(), flags, texture_target, miplevel, texture, &status_code)); + PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture, + create_from_gl_texture_3d, clCreateFromGLTexture3D, + (context &ctx, cl_mem_flags flags, + GLenum texture_target, GLint miplevel, GLuint texture), + (ctx.data(), flags, texture_target, miplevel, texture, &status_code)); + PYOPENCL_WRAP_BUFFER_CREATOR(gl_renderbuffer, + create_from_gl_renderbuffer, clCreateFromGLRenderbuffer, + (context &ctx, cl_mem_flags flags, GLuint renderbuffer), + (ctx.data(), flags, renderbuffer, &status_code)); + + inline + gl_texture *create_from_gl_texture( + context &ctx, cl_mem_flags flags, + GLenum texture_target, GLint miplevel, + GLuint texture, unsigned dims) + { + if (dims == 2) + return create_from_gl_texture_2d(ctx, flags, texture_target, miplevel, texture); + else if (dims == 3) + return create_from_gl_texture_3d(ctx, flags, texture_target, miplevel, texture); + else + throw pyopencl::error("Image", CL_INVALID_VALUE, + "invalid dimension"); + } + + + + + + inline + py::tuple get_gl_object_info(memory_object_holder const &mem) + { + cl_gl_object_type otype; + GLuint gl_name; + PYOPENCL_CALL_GUARDED(clGetGLObjectInfo, (mem.data(), &otype, &gl_name)); + return py::make_tuple(otype, gl_name); + } + +#define WRAP_GL_ENQUEUE(what, What) \ + inline \ + event *enqueue_##what##_gl_objects( \ + command_queue &cq, \ + py::object py_mem_objects, \ + py::object py_wait_for) \ + { \ + PYOPENCL_PARSE_WAIT_FOR; \ + \ + std::vector mem_objects; \ + PYTHON_FOREACH(mo, py_mem_objects) \ + mem_objects.push_back(py::extract(mo)().data()); \ + \ + cl_event evt; \ + PYOPENCL_CALL_GUARDED(clEnqueue##What##GLObjects, ( \ + cq.data(), \ + mem_objects.size(), mem_objects.empty( ) ? NULL : &mem_objects.front(), \ + PYOPENCL_WAITLIST_ARGS, &evt \ + )); \ + \ + PYOPENCL_RETURN_NEW_EVENT(evt); \ + } + + WRAP_GL_ENQUEUE(acquire, Acquire); + WRAP_GL_ENQUEUE(release, Release); +#endif + + + + +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) + inline + py::object get_gl_context_info_khr( + py::object py_properties, + cl_gl_context_info param_name, + py::object py_platform + ) + { + std::vector props + = parse_context_properties(py_properties); + + typedef CL_API_ENTRY cl_int (CL_API_CALL + *func_ptr_type)(const cl_context_properties * /* properties */, + cl_gl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + + func_ptr_type func_ptr; + +#if PYOPENCL_CL_VERSION >= 0x1020 + if (py_platform.ptr() != Py_None) + { + platform &plat = py::extract(py_platform); + + func_ptr = (func_ptr_type) clGetExtensionFunctionAddressForPlatform( + plat.data(), "clGetGLContextInfoKHR"); + } + else + { + PYOPENCL_DEPRECATED("get_gl_context_info_khr with platform=None", "2013.1", ); + + func_ptr = (func_ptr_type) clGetExtensionFunctionAddress( + "clGetGLContextInfoKHR"); + } +#else + func_ptr = (func_ptr_type) clGetExtensionFunctionAddress( + "clGetGLContextInfoKHR"); +#endif + + + if (!func_ptr) + throw error("Context.get_info", CL_INVALID_PLATFORM, + "clGetGLContextInfoKHR extension function not present"); + + cl_context_properties *props_ptr + = props.empty( ) ? NULL : &props.front(); + + switch (param_name) + { + case CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR: + { + cl_device_id param_value; + PYOPENCL_CALL_GUARDED(func_ptr, + (props_ptr, param_name, sizeof(param_value), ¶m_value, 0)); + return py::object(handle_from_new_ptr( \ + new device(param_value, /*retain*/ true))); + } + + case CL_DEVICES_FOR_GL_CONTEXT_KHR: + { + size_t size; + PYOPENCL_CALL_GUARDED(func_ptr, + (props_ptr, param_name, 0, 0, &size)); + + std::vector devices; + + devices.resize(size / sizeof(devices.front())); + + PYOPENCL_CALL_GUARDED(func_ptr, + (props_ptr, param_name, size, + devices.empty( ) ? NULL : &devices.front(), &size)); + + py::list result; + BOOST_FOREACH(cl_device_id did, devices) + result.append(handle_from_new_ptr( + new device(did))); + + return result; + } + + default: + throw error("get_gl_context_info_khr", CL_INVALID_VALUE); + } + } + +#endif + + // }}} + + // {{{ deferred implementation bits + + inline py::object create_mem_object_wrapper(cl_mem mem) + { + cl_mem_object_type mem_obj_type; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \ + (mem, CL_MEM_TYPE, sizeof(mem_obj_type), &mem_obj_type, 0)); + + switch (mem_obj_type) + { + case CL_MEM_OBJECT_BUFFER: + return py::object(handle_from_new_ptr( + new buffer(mem, /*retain*/ true))); + case CL_MEM_OBJECT_IMAGE2D: + case CL_MEM_OBJECT_IMAGE3D: +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_MEM_OBJECT_IMAGE2D_ARRAY: + case CL_MEM_OBJECT_IMAGE1D: + case CL_MEM_OBJECT_IMAGE1D_ARRAY: + case CL_MEM_OBJECT_IMAGE1D_BUFFER: +#endif + return py::object(handle_from_new_ptr( + new image(mem, /*retain*/ true))); + default: + return py::object(handle_from_new_ptr( + new memory_object(mem, /*retain*/ true))); + } + } + + inline + py::object memory_object_from_int(intptr_t cl_mem_as_int) + { + return create_mem_object_wrapper((cl_mem) cl_mem_as_int); + } + + + inline + py::object memory_object_holder::get_info(cl_mem_info param_name) const + { + switch (param_name) + { + case CL_MEM_TYPE: + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, + cl_mem_object_type); + case CL_MEM_FLAGS: + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, + cl_mem_flags); + case CL_MEM_SIZE: + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, + size_t); + case CL_MEM_HOST_PTR: + throw pyopencl::error("MemoryObject.get_info", CL_INVALID_VALUE, + "Use MemoryObject.get_host_array to get host pointer."); + case CL_MEM_MAP_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, + cl_uint); + case CL_MEM_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, + cl_uint); + case CL_MEM_CONTEXT: + PYOPENCL_GET_OPAQUE_INFO(MemObject, data(), param_name, + cl_context, context); + +#if PYOPENCL_CL_VERSION >= 0x1010 + case CL_MEM_ASSOCIATED_MEMOBJECT: + { + cl_mem param_value; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \ + (data(), param_name, sizeof(param_value), ¶m_value, 0)); + if (param_value == 0) + { + // no associated memory object? no problem. + return py::object(); + } + + return create_mem_object_wrapper(param_value); + } + case CL_MEM_OFFSET: + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, + size_t); +#endif + + default: + throw error("MemoryObjectHolder.get_info", CL_INVALID_VALUE); + } + } + + inline + py::handle<> get_mem_obj_host_array( + py::object mem_obj_py, + py::object shape, py::object dtype, + py::object order_py) + { + memory_object_holder const &mem_obj = + py::extract(mem_obj_py); + PyArray_Descr *tp_descr; + if (PyArray_DescrConverter(dtype.ptr(), &tp_descr) != NPY_SUCCEED) + throw py::error_already_set(); + cl_mem_flags mem_flags; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (mem_obj.data(), CL_MEM_FLAGS, sizeof(mem_flags), &mem_flags, 0)); + if (!(mem_flags & CL_MEM_USE_HOST_PTR)) + throw pyopencl::error("MemoryObject.get_host_array", CL_INVALID_VALUE, + "Only MemoryObject with USE_HOST_PTR " + "is supported."); + + py::extract shape_as_int(shape); + std::vector dims; + + if (shape_as_int.check()) + dims.push_back(shape_as_int()); + else + std::copy( + py::stl_input_iterator(shape), + py::stl_input_iterator(), + back_inserter(dims)); + + NPY_ORDER order = PyArray_CORDER; + PyArray_OrderConverter(order_py.ptr(), &order); + + int ary_flags = 0; + if (order == PyArray_FORTRANORDER) + ary_flags |= NPY_FARRAY; + else if (order == PyArray_CORDER) + ary_flags |= NPY_CARRAY; + else + throw std::runtime_error("unrecognized order specifier"); + + void *host_ptr; + size_t mem_obj_size; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (mem_obj.data(), CL_MEM_HOST_PTR, sizeof(host_ptr), + &host_ptr, 0)); + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (mem_obj.data(), CL_MEM_SIZE, sizeof(mem_obj_size), + &mem_obj_size, 0)); + + py::handle<> result = py::handle<>(PyArray_NewFromDescr( + &PyArray_Type, tp_descr, + dims.size(), &dims.front(), /*strides*/ NULL, + host_ptr, ary_flags, /*obj*/NULL)); + + if ((size_t) PyArray_NBYTES(result.get()) > mem_obj_size) + throw pyopencl::error("MemoryObject.get_host_array", + CL_INVALID_VALUE, + "Resulting array is larger than memory object."); + + PyArray_BASE(result.get()) = mem_obj_py.ptr(); + Py_INCREF(mem_obj_py.ptr()); + + return result; + } + + // }}} + +} + + + + +#endif + +// vim: foldmethod=marker diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp new file mode 100644 index 00000000..f3448aca --- /dev/null +++ b/src/wrap_cl_part_1.cpp @@ -0,0 +1,312 @@ +#include "wrap_cl.hpp" + + + + +using namespace pyopencl; + + + + +void pyopencl_expose_part_1() +{ + py::docstring_options doc_op; + doc_op.disable_cpp_signatures(); + + py::def("get_cl_header_version", get_cl_header_version); + + // {{{ platform + DEF_SIMPLE_FUNCTION(get_platforms); + + { + typedef platform cls; + py::class_("Platform", py::no_init) + .DEF_SIMPLE_METHOD(get_info) + .def("get_devices", &cls::get_devices, + py::arg("device_type")=CL_DEVICE_TYPE_ALL) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_platform_id) + ; + } + + // }}} + + // {{{ device + { + typedef device cls; + py::class_("Device", py::no_init) + .DEF_SIMPLE_METHOD(get_info) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) +#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) + .DEF_SIMPLE_METHOD(create_sub_devices_ext) +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + .DEF_SIMPLE_METHOD(create_sub_devices) +#endif + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_device_id) + ; + } + + // }}} + + // {{{ context + + { + typedef context cls; + py::class_ >("Context", py::no_init) + .def("__init__", make_constructor(create_context, + py::default_call_policies(), + (py::arg("devices")=py::object(), + py::arg("properties")=py::object(), + py::arg("dev_type")=py::object() + ))) + .DEF_SIMPLE_METHOD(get_info) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_context) + ; + } + + // }}} + + // {{{ command queue + { + typedef command_queue cls; + py::class_("CommandQueue", + py::init + ((py::arg("context"), py::arg("device")=py::object(), py::arg("properties")=0))) + .DEF_SIMPLE_METHOD(get_info) +#if PYOPENCL_CL_VERSION < 0x1010 + .DEF_SIMPLE_METHOD(set_property) +#endif + .DEF_SIMPLE_METHOD(flush) + .DEF_SIMPLE_METHOD(finish) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_command_queue) + ; + } + + // }}} + + // {{{ events/synchronization + { + typedef event cls; + py::class_("Event", py::no_init) + .DEF_SIMPLE_METHOD(get_info) + .DEF_SIMPLE_METHOD(get_profiling_info) + .DEF_SIMPLE_METHOD(wait) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_event) + + // deprecated, remove in 2015.x. + .def("from_cl_event_as_int", from_int_ptr, + py::return_value_policy()) + .staticmethod("from_cl_event_as_int") + ; + } + { + typedef nanny_event cls; + py::class_ >("NannyEvent", py::no_init) + .DEF_SIMPLE_METHOD(get_ward) + ; + } + + DEF_SIMPLE_FUNCTION(wait_for_events); + +#if PYOPENCL_CL_VERSION >= 0x1020 + py::def("_enqueue_marker_with_wait_list", enqueue_marker_with_wait_list, + (py::arg("queue"), py::arg("wait_for")=py::object()), + py::return_value_policy()); +#endif + py::def("_enqueue_marker", enqueue_marker, + (py::arg("queue")), + py::return_value_policy()); + py::def("_enqueue_wait_for_events", enqueue_wait_for_events, + (py::arg("queue"), py::arg("wait_for")=py::object())); + +#if PYOPENCL_CL_VERSION >= 0x1020 + py::def("_enqueue_barrier_with_wait_list", enqueue_barrier_with_wait_list, + (py::arg("queue"), py::arg("wait_for")=py::object()), + py::return_value_policy()); +#endif + py::def("_enqueue_barrier", enqueue_barrier, py::arg("queue")); + +#if PYOPENCL_CL_VERSION >= 0x1010 + { + typedef user_event cls; + py::class_, boost::noncopyable>("UserEvent", py::no_init) + .def("__init__", make_constructor( + create_user_event, py::default_call_policies(), py::args("context"))) + .DEF_SIMPLE_METHOD(set_status) + ; + } +#endif + + // }}} + + // {{{ memory_object + + { + typedef memory_object_holder cls; + py::class_( + "MemoryObjectHolder", py::no_init) + .DEF_SIMPLE_METHOD(get_info) + .def("get_host_array", get_mem_obj_host_array, + (py::arg("shape"), py::arg("dtype"), py::arg("order")="C")) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + + .add_property("int_ptr", to_int_ptr, + "Return an integer corresponding to the pointer value " + "of the underlying :c:type:`cl_mem`. " + "Use :meth:`from_int_ptr` to turn back into a Python object." + "\n\n.. versionadded:: 2013.2\n") + ; + } + { + typedef memory_object cls; + py::class_ >( + "MemoryObject", py::no_init) + .DEF_SIMPLE_METHOD(release) + .add_property("hostbuf", &cls::hostbuf) + + .def("from_int_ptr", memory_object_from_int, + "(static method) Return a new Python object referencing the C-level " \ + ":c:type:`cl_mem` object at the location pointed to " \ + "by *int_ptr_value*. The relevant :c:func:`clRetain*` function " \ + "will be called." \ + "\n\n.. versionadded:: 2013.2\n") \ + .staticmethod("from_int_ptr") + + // deprecated, remove in 2015.x + .def("from_cl_mem_as_int", memory_object_from_int) + .staticmethod("from_cl_mem_as_int") + ; + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + py::def("enqueue_migrate_mem_objects", enqueue_migrate_mem_objects, + (py::args("queue", "mem_objects"), + py::arg("flags")=0, + py::arg("wait_for")=py::object() + ), + py::return_value_policy()); +#endif + +#ifdef cl_ext_migrate_memobject + py::def("enqueue_migrate_mem_object_ext", enqueue_migrate_mem_object_ext, + (py::args("queue", "mem_objects"), + py::arg("flags")=0, + py::arg("wait_for")=py::object() + ), + py::return_value_policy()); +#endif + // }}} + + // {{{ buffer + { + typedef buffer cls; + py::class_, boost::noncopyable>( + "Buffer", py::no_init) + .def("__init__", make_constructor(create_buffer_py, + py::default_call_policies(), + (py::args("context", "flags"), + py::arg("size")=0, + py::arg("hostbuf")=py::object() + ))) +#if PYOPENCL_CL_VERSION >= 0x1010 + .def("get_sub_region", &cls::get_sub_region, + (py::args("origin", "size"), py::arg("flags")=0), + py::return_value_policy()) + .def("__getitem__", &cls::getitem, + py::return_value_policy()) +#endif + ; + } + + // }}} + + // {{{ transfers + + // {{{ byte-for-byte + py::def("_enqueue_read_buffer", enqueue_read_buffer, + (py::args("queue", "mem", "hostbuf"), + py::arg("device_offset")=0, + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true + ), + py::return_value_policy()); + py::def("_enqueue_write_buffer", enqueue_write_buffer, + (py::args("queue", "mem", "hostbuf"), + py::arg("device_offset")=0, + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true + ), + py::return_value_policy()); + py::def("_enqueue_copy_buffer", enqueue_copy_buffer, + (py::args("queue", "src", "dst"), + py::arg("byte_count")=-1, + py::arg("src_offset")=0, + py::arg("dst_offset")=0, + py::arg("wait_for")=py::object() + ), + py::return_value_policy()); + + // }}} + + // {{{ rectangular + +#if PYOPENCL_CL_VERSION >= 0x1010 + py::def("_enqueue_read_buffer_rect", enqueue_read_buffer_rect, + (py::args("queue", "mem", "hostbuf", + "buffer_origin", "host_origin", "region"), + py::arg("buffer_pitches")=py::object(), + py::arg("host_pitches")=py::object(), + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true + ), + py::return_value_policy()); + py::def("_enqueue_write_buffer_rect", enqueue_write_buffer_rect, + (py::args("queue", "mem", "hostbuf", + "buffer_origin", "host_origin", "region"), + py::arg("buffer_pitches")=py::object(), + py::arg("host_pitches")=py::object(), + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true + ), + py::return_value_policy()); + py::def("_enqueue_copy_buffer_rect", enqueue_copy_buffer_rect, + (py::args("queue", "src", "dst", + "src_origin", "dst_origin", "region"), + py::arg("src_pitches")=py::object(), + py::arg("dst_pitches")=py::object(), + py::arg("wait_for")=py::object() + ), + py::return_value_policy()); +#endif + + // }}} + + // }}} + +#if PYOPENCL_CL_VERSION >= 0x1020 + py::def("_enqueue_fill_buffer", enqueue_fill_buffer, + (py::args("queue", "mem", "pattern", "offset", "size"), + py::arg("wait_for")=py::object()), + py::return_value_policy()); +#endif +} + +// vim: foldmethod=marker diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp new file mode 100644 index 00000000..4d010796 --- /dev/null +++ b/src/wrap_cl_part_2.cpp @@ -0,0 +1,359 @@ +#include "wrap_cl.hpp" + + + + +namespace pyopencl { +#if PYOPENCL_CL_VERSION >= 0x1020 + py::object image_desc_dummy_getter(cl_image_desc &desc) + { + return py::object(); + } + + void image_desc_set_shape(cl_image_desc &desc, py::object py_shape) + { + COPY_PY_REGION_TRIPLE(shape); + desc.image_width = shape[0]; + desc.image_height = shape[1]; + desc.image_depth = shape[2]; + desc.image_array_size = shape[2]; + } + + void image_desc_set_pitches(cl_image_desc &desc, py::object py_pitches) + { + COPY_PY_PITCH_TUPLE(pitches); + desc.image_row_pitch = pitches[0]; + desc.image_slice_pitch = pitches[1]; + } + + void image_desc_set_buffer(cl_image_desc &desc, memory_object *mobj) + { + if (mobj) + desc.buffer = mobj->data(); + else + desc.buffer = 0; + } + +#endif +} + + + + +using namespace pyopencl; + + + + +void pyopencl_expose_part_2() +{ + py::docstring_options doc_op; + doc_op.disable_cpp_signatures(); + + // {{{ image + +#if PYOPENCL_CL_VERSION >= 0x1020 + { + typedef cl_image_desc cls; + py::class_("ImageDescriptor") + .def_readwrite("image_type", &cls::image_type) + .add_property("shape", &image_desc_dummy_getter, image_desc_set_shape) + .def_readwrite("array_size", &cls::image_array_size) + .add_property("pitches", &image_desc_dummy_getter, image_desc_set_pitches) + .def_readwrite("num_mip_levels", &cls::num_mip_levels) + .def_readwrite("num_samples", &cls::num_samples) + .add_property("buffer", &image_desc_dummy_getter, image_desc_set_buffer) + ; + } +#endif + + { + typedef image cls; + py::class_, boost::noncopyable>( + "Image", py::no_init) + .def("__init__", make_constructor(create_image, + py::default_call_policies(), + (py::args("context", "flags", "format"), + py::arg("shape")=py::object(), + py::arg("pitches")=py::object(), + py::arg("hostbuf")=py::object() + ))) +#if PYOPENCL_CL_VERSION >= 0x1020 + .def("__init__", make_constructor(create_image_from_desc, + py::default_call_policies(), + (py::args("context", "flags", "format", "desc"), + py::arg("hostbuf")=py::object()))) +#endif + .DEF_SIMPLE_METHOD(get_image_info) + ; + } + + { + typedef cl_image_format cls; + py::class_("ImageFormat") + .def("__init__", py::make_constructor(make_image_format)) + .def_readwrite("channel_order", &cls::image_channel_order) + .def_readwrite("channel_data_type", &cls::image_channel_data_type) + .add_property("channel_count", &get_image_format_channel_count) + .add_property("dtype_size", &get_image_format_channel_dtype_size) + .add_property("itemsize", &get_image_format_item_size) + ; + } + + DEF_SIMPLE_FUNCTION(get_supported_image_formats); + + py::def("_enqueue_read_image", enqueue_read_image, + (py::args("queue", "mem", "origin", "region", "hostbuf"), + py::arg("row_pitch")=0, + py::arg("slice_pitch")=0, + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true + ), + py::return_value_policy()); + py::def("_enqueue_write_image", enqueue_write_image, + (py::args("queue", "mem", "origin", "region", "hostbuf"), + py::arg("row_pitch")=0, + py::arg("slice_pitch")=0, + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true + ), + py::return_value_policy()); + + py::def("_enqueue_copy_image", enqueue_copy_image, + (py::args("queue", "src", "dest", "src_origin", "dest_origin", "region"), + py::arg("wait_for")=py::object()), + py::return_value_policy()); + py::def("_enqueue_copy_image_to_buffer", enqueue_copy_image_to_buffer, + (py::args("queue", "src", "dest", "origin", "region", "offset"), + py::arg("wait_for")=py::object()), + py::return_value_policy()); + py::def("_enqueue_copy_buffer_to_image", enqueue_copy_buffer_to_image, + (py::args("queue", "src", "dest", "offset", "origin", "region"), + py::arg("wait_for")=py::object()), + py::return_value_policy()); + +#if PYOPENCL_CL_VERSION >= 0x1020 + py::def("enqueue_fill_image", enqueue_write_image, + (py::args("queue", "mem", "color", "origin", "region"), + py::arg("wait_for")=py::object()), + py::return_value_policy()); +#endif + + // }}} + + // {{{ memory_map + { + typedef memory_map cls; + py::class_("MemoryMap", py::no_init) + .def("release", &cls::release, + (py::arg("queue")=0, py::arg("wait_for")=py::object()), + py::return_value_policy()) + ; + } + + py::def("enqueue_map_buffer", enqueue_map_buffer, + (py::args("queue", "buf", "flags", + "offset", + "shape", "dtype"), + py::arg("order")="C", + py::arg("strides")=py::object(), + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true)); + py::def("enqueue_map_image", enqueue_map_image, + (py::args("queue", "img", "flags", + "origin", "region", + "shape", "dtype"), + py::arg("order")="C", + py::arg("strides")=py::object(), + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true)); + + // }}} + + // {{{ sampler + { + typedef sampler cls; + py::class_("Sampler", + py::init()) + .DEF_SIMPLE_METHOD(get_info) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_sampler) + ; + } + + // }}} + + // {{{ program + { + typedef program cls; + py::enum_("program_kind") + .value("UNKNOWN", cls::KND_UNKNOWN) + .value("SOURCE", cls::KND_SOURCE) + .value("BINARY", cls::KND_BINARY) + ; + + py::class_("_Program", py::no_init) + .def("__init__", make_constructor( + create_program_with_source, + py::default_call_policies(), + py::args("context", "src"))) + .def("__init__", make_constructor( + create_program_with_binary, + py::default_call_policies(), + py::args("context", "devices", "binaries"))) +#if (PYOPENCL_CL_VERSION >= 0x1020) && \ + ((PYOPENCL_CL_VERSION >= 0x1030) && defined(__APPLE__)) + .def("create_with_built_in_kernels", + create_program_with_built_in_kernels, + py::args("context", "devices", "kernel_names"), + py::return_value_policy()) + .staticmethod("create_with_built_in_kernels") +#endif + .DEF_SIMPLE_METHOD(kind) + .DEF_SIMPLE_METHOD(get_info) + .DEF_SIMPLE_METHOD(get_build_info) + .def("_build", &cls::build, + (py::arg("options")="", py::arg("devices")=py::object())) +#if PYOPENCL_CL_VERSION >= 0x1020 + .def("compile", &cls::compile, + (py::arg("options")="", py::arg("devices")=py::object(), + py::arg("headers")=py::list())) + .def("link", &link_program, + (py::arg("context"), + py::arg("programs"), + py::arg("options")="", + py::arg("devices")=py::object()), + py::return_value_policy()) + .staticmethod("link") +#endif + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + .def("all_kernels", create_kernels_in_program) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_program) + ; + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + py::def("unload_platform_compiler", unload_platform_compiler); +#endif + + // }}} + + // {{{ kernel + + { + typedef kernel cls; + py::class_("Kernel", + py::init()) + .DEF_SIMPLE_METHOD(get_info) + .DEF_SIMPLE_METHOD(get_work_group_info) + .DEF_SIMPLE_METHOD(set_arg) +#if PYOPENCL_CL_VERSION >= 0x1020 + .DEF_SIMPLE_METHOD(get_arg_info) +#endif + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_kernel) + ; + } + + { + typedef local_memory cls; + py::class_("LocalMemory", + py::init(py::arg("size"))) + .add_property("size", &cls::size) + ; + } + + + py::def("enqueue_nd_range_kernel", enqueue_nd_range_kernel, + (py::args("queue", "kernel"), + py::arg("global_work_size"), + py::arg("local_work_size"), + py::arg("global_work_offset")=py::object(), + py::arg("wait_for")=py::object(), + py::arg("g_times_l")=false + ), + py::return_value_policy()); + py::def("enqueue_task", enqueue_task, + (py::args("queue", "kernel"), + py::arg("wait_for")=py::object() + ), + py::return_value_policy()); + + // TODO: clEnqueueNativeKernel + // }}} + + // {{{ GL interop + DEF_SIMPLE_FUNCTION(have_gl); + +#ifdef HAVE_GL + +#ifdef __APPLE__ + DEF_SIMPLE_FUNCTION(get_apple_cgl_share_group); +#endif /* __APPLE__ */ + + { + typedef gl_buffer cls; + py::class_, boost::noncopyable>( + "GLBuffer", py::no_init) + .def("__init__", make_constructor(create_from_gl_buffer, + py::default_call_policies(), + (py::args("context", "flags", "bufobj")))) + .def("get_gl_object_info", get_gl_object_info) + ; + } + + { + typedef gl_renderbuffer cls; + py::class_, boost::noncopyable>( + "GLRenderBuffer", py::no_init) + .def("__init__", make_constructor(create_from_gl_renderbuffer, + py::default_call_policies(), + (py::args("context", "flags", "bufobj")))) + .def("get_gl_object_info", get_gl_object_info) + ; + } + + { + typedef gl_texture cls; + py::class_, boost::noncopyable>( + "GLTexture", py::no_init) + .def("__init__", make_constructor(create_from_gl_texture, + py::default_call_policies(), + (py::args("context", "flags", + "texture_target", "miplevel", + "texture", "dims")))) + .def("get_gl_object_info", get_gl_object_info) + .DEF_SIMPLE_METHOD(get_gl_texture_info) + ; + } + + py::def("enqueue_acquire_gl_objects", enqueue_acquire_gl_objects, + (py::args("queue", "mem_objects"), + py::arg("wait_for")=py::object() + ), + py::return_value_policy()); + py::def("enqueue_release_gl_objects", enqueue_release_gl_objects, + (py::args("queue", "mem_objects"), + py::arg("wait_for")=py::object() + ), + py::return_value_policy()); + +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) + py::def("get_gl_context_info_khr", get_gl_context_info_khr, + (py::args("properties", "param_name"), py::arg("platform")=py::object())); +#endif + +#endif + // }}} +} + + + + +// vim: foldmethod=marker diff --git a/src/wrap_constants.cpp b/src/wrap_constants.cpp new file mode 100644 index 00000000..64511d01 --- /dev/null +++ b/src/wrap_constants.cpp @@ -0,0 +1,868 @@ +#include "wrap_cl.hpp" + + + + +using namespace pyopencl; + + + + +namespace +{ + py::handle<> + CLError, + CLMemoryError, + CLLogicError, + CLRuntimeError; + + + + + void translate_cl_error(const error &err) + { + if (err.code() == CL_MEM_OBJECT_ALLOCATION_FAILURE) + PyErr_SetObject(CLMemoryError.get(), py::object(err).ptr()); + else if (err.code() <= CL_INVALID_VALUE) + PyErr_SetObject(CLLogicError.get(), py::object(err).ptr()); + else if (err.code() > CL_INVALID_VALUE && err.code() < CL_SUCCESS) + PyErr_SetObject(CLRuntimeError.get(), py::object(err).ptr()); + else + PyErr_SetObject(CLError.get(), py::object(err).ptr()); + } + + + + + // {{{ 'fake' constant scopes + class status_code { }; + class platform_info { }; + class device_type { }; + class device_info { }; + class device_fp_config { }; + class device_mem_cache_type { }; + class device_local_mem_type { }; + class device_exec_capabilities { }; + class command_queue_properties { }; + class context_info { }; + class gl_context_info { }; + class context_properties { }; + class command_queue_info { }; + class mem_flags { }; + class channel_order { }; + class channel_type { }; + class mem_object_type { }; + class mem_info { }; + class image_info { }; + class addressing_mode { }; + class filter_mode { }; + class sampler_info { }; + class map_flags { }; + class program_info { }; + class program_build_info { }; + class program_binary_type { }; + class build_status { }; + class kernel_info { }; + class kernel_arg_info { }; + class kernel_arg_address_qualifier { }; + class kernel_arg_access_qualifier { }; + class kernel_work_group_info { }; + class event_info { }; + class command_type { }; + class command_execution_status { }; + class profiling_info { }; + class buffer_create_type { }; + class mem_migration_flags { }; + + class device_partition_property { }; + class device_affinity_domain { }; + + class device_partition_property_ext { }; + class affinity_domain_ext { }; + + class gl_object_type { }; + class gl_texture_info { }; + + class migrate_mem_object_flags_ext {}; + // }}} +} + + + + +void pyopencl_expose_constants() +{ + // {{{ exceptions +#define DECLARE_EXC(NAME, BASE) \ + CL##NAME = py::handle<>(PyErr_NewException("pyopencl." #NAME, BASE, NULL)); \ + py::scope().attr(#NAME) = CL##NAME; + + { + DECLARE_EXC(Error, NULL); + DECLARE_EXC(MemoryError, CLError.get()); + DECLARE_EXC(LogicError, CLError.get()); + DECLARE_EXC(RuntimeError, CLError.get()); + + py::register_exception_translator(translate_cl_error); + } + // }}} + + // {{{ constants +#define ADD_ATTR(PREFIX, NAME) \ + cls.attr(#NAME) = CL_##PREFIX##NAME +#define ADD_ATTR_SUFFIX(PREFIX, NAME, SUFFIX) \ + cls.attr(#NAME) = CL_##PREFIX##NAME##SUFFIX + + { + typedef error cls; + py::class_ ("_error", py::no_init) + .DEF_SIMPLE_METHOD(routine) + .DEF_SIMPLE_METHOD(code) + .DEF_SIMPLE_METHOD(what) + ; + } + + { + py::class_ cls("status_code", py::no_init); + + ADD_ATTR(, SUCCESS); + ADD_ATTR(, DEVICE_NOT_FOUND); + ADD_ATTR(, DEVICE_NOT_AVAILABLE); +#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) + ADD_ATTR(, COMPILER_NOT_AVAILABLE); +#endif + ADD_ATTR(, MEM_OBJECT_ALLOCATION_FAILURE); + ADD_ATTR(, OUT_OF_RESOURCES); + ADD_ATTR(, OUT_OF_HOST_MEMORY); + ADD_ATTR(, PROFILING_INFO_NOT_AVAILABLE); + ADD_ATTR(, MEM_COPY_OVERLAP); + ADD_ATTR(, IMAGE_FORMAT_MISMATCH); + ADD_ATTR(, IMAGE_FORMAT_NOT_SUPPORTED); + ADD_ATTR(, BUILD_PROGRAM_FAILURE); + ADD_ATTR(, MAP_FAILURE); + + ADD_ATTR(, INVALID_VALUE); + ADD_ATTR(, INVALID_DEVICE_TYPE); + ADD_ATTR(, INVALID_PLATFORM); + ADD_ATTR(, INVALID_DEVICE); + ADD_ATTR(, INVALID_CONTEXT); + ADD_ATTR(, INVALID_QUEUE_PROPERTIES); + ADD_ATTR(, INVALID_COMMAND_QUEUE); + ADD_ATTR(, INVALID_HOST_PTR); + ADD_ATTR(, INVALID_MEM_OBJECT); + ADD_ATTR(, INVALID_IMAGE_FORMAT_DESCRIPTOR); + ADD_ATTR(, INVALID_IMAGE_SIZE); + ADD_ATTR(, INVALID_SAMPLER); + ADD_ATTR(, INVALID_BINARY); + ADD_ATTR(, INVALID_BUILD_OPTIONS); + ADD_ATTR(, INVALID_PROGRAM); + ADD_ATTR(, INVALID_PROGRAM_EXECUTABLE); + ADD_ATTR(, INVALID_KERNEL_NAME); + ADD_ATTR(, INVALID_KERNEL_DEFINITION); + ADD_ATTR(, INVALID_KERNEL); + ADD_ATTR(, INVALID_ARG_INDEX); + ADD_ATTR(, INVALID_ARG_VALUE); + ADD_ATTR(, INVALID_ARG_SIZE); + ADD_ATTR(, INVALID_KERNEL_ARGS); + ADD_ATTR(, INVALID_WORK_DIMENSION); + ADD_ATTR(, INVALID_WORK_GROUP_SIZE); + ADD_ATTR(, INVALID_WORK_ITEM_SIZE); + ADD_ATTR(, INVALID_GLOBAL_OFFSET); + ADD_ATTR(, INVALID_EVENT_WAIT_LIST); + ADD_ATTR(, INVALID_EVENT); + ADD_ATTR(, INVALID_OPERATION); + ADD_ATTR(, INVALID_GL_OBJECT); + ADD_ATTR(, INVALID_BUFFER_SIZE); + ADD_ATTR(, INVALID_MIP_LEVEL); + +#if defined(cl_khr_icd) && (cl_khr_icd >= 1) + ADD_ATTR(, PLATFORM_NOT_FOUND_KHR); +#endif + +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) + ADD_ATTR(, INVALID_GL_SHAREGROUP_REFERENCE_KHR); +#endif + +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(, MISALIGNED_SUB_BUFFER_OFFSET); + ADD_ATTR(, EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); + ADD_ATTR(, INVALID_GLOBAL_WORK_SIZE); +#endif + +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(, COMPILE_PROGRAM_FAILURE); + ADD_ATTR(, LINKER_NOT_AVAILABLE); + ADD_ATTR(, LINK_PROGRAM_FAILURE); + ADD_ATTR(, DEVICE_PARTITION_FAILED); + ADD_ATTR(, KERNEL_ARG_INFO_NOT_AVAILABLE); + ADD_ATTR(, INVALID_IMAGE_DESCRIPTOR); + ADD_ATTR(, INVALID_COMPILER_OPTIONS); + ADD_ATTR(, INVALID_LINKER_OPTIONS); + ADD_ATTR(, INVALID_DEVICE_PARTITION_COUNT); +#endif + +#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) + ADD_ATTR(, DEVICE_PARTITION_FAILED_EXT); + ADD_ATTR(, INVALID_PARTITION_COUNT_EXT); + ADD_ATTR(, INVALID_PARTITION_NAME_EXT); +#endif + } + + { + py::class_ cls("platform_info", py::no_init); + ADD_ATTR(PLATFORM_, PROFILE); + ADD_ATTR(PLATFORM_, VERSION); + ADD_ATTR(PLATFORM_, NAME); + ADD_ATTR(PLATFORM_, VENDOR); +#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) + ADD_ATTR(PLATFORM_, EXTENSIONS); +#endif + } + + { + py::class_ cls("device_type", py::no_init); + ADD_ATTR(DEVICE_TYPE_, DEFAULT); + ADD_ATTR(DEVICE_TYPE_, CPU); + ADD_ATTR(DEVICE_TYPE_, GPU); + ADD_ATTR(DEVICE_TYPE_, ACCELERATOR); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(DEVICE_TYPE_, CUSTOM); +#endif + ADD_ATTR(DEVICE_TYPE_, ALL); + } + + { + py::class_ cls("device_info", py::no_init); + ADD_ATTR(DEVICE_, TYPE); + ADD_ATTR(DEVICE_, VENDOR_ID); + ADD_ATTR(DEVICE_, MAX_COMPUTE_UNITS); + ADD_ATTR(DEVICE_, MAX_WORK_ITEM_DIMENSIONS); + ADD_ATTR(DEVICE_, MAX_WORK_GROUP_SIZE); + ADD_ATTR(DEVICE_, MAX_WORK_ITEM_SIZES); + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_CHAR); + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_SHORT); + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_INT); + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_LONG); + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_FLOAT); + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_DOUBLE); + ADD_ATTR(DEVICE_, MAX_CLOCK_FREQUENCY); + ADD_ATTR(DEVICE_, ADDRESS_BITS); + ADD_ATTR(DEVICE_, MAX_READ_IMAGE_ARGS); + ADD_ATTR(DEVICE_, MAX_WRITE_IMAGE_ARGS); + ADD_ATTR(DEVICE_, MAX_MEM_ALLOC_SIZE); + ADD_ATTR(DEVICE_, IMAGE2D_MAX_WIDTH); + ADD_ATTR(DEVICE_, IMAGE2D_MAX_HEIGHT); + ADD_ATTR(DEVICE_, IMAGE3D_MAX_WIDTH); + ADD_ATTR(DEVICE_, IMAGE3D_MAX_HEIGHT); + ADD_ATTR(DEVICE_, IMAGE3D_MAX_DEPTH); + ADD_ATTR(DEVICE_, IMAGE_SUPPORT); + ADD_ATTR(DEVICE_, MAX_PARAMETER_SIZE); + ADD_ATTR(DEVICE_, MAX_SAMPLERS); + ADD_ATTR(DEVICE_, MEM_BASE_ADDR_ALIGN); + ADD_ATTR(DEVICE_, MIN_DATA_TYPE_ALIGN_SIZE); + ADD_ATTR(DEVICE_, SINGLE_FP_CONFIG); +#ifdef CL_DEVICE_DOUBLE_FP_CONFIG + ADD_ATTR(DEVICE_, DOUBLE_FP_CONFIG); +#endif +#ifdef CL_DEVICE_HALF_FP_CONFIG + ADD_ATTR(DEVICE_, HALF_FP_CONFIG); +#endif + ADD_ATTR(DEVICE_, GLOBAL_MEM_CACHE_TYPE); + ADD_ATTR(DEVICE_, GLOBAL_MEM_CACHELINE_SIZE); + ADD_ATTR(DEVICE_, GLOBAL_MEM_CACHE_SIZE); + ADD_ATTR(DEVICE_, GLOBAL_MEM_SIZE); + ADD_ATTR(DEVICE_, MAX_CONSTANT_BUFFER_SIZE); + ADD_ATTR(DEVICE_, MAX_CONSTANT_ARGS); + ADD_ATTR(DEVICE_, LOCAL_MEM_TYPE); + ADD_ATTR(DEVICE_, LOCAL_MEM_SIZE); + ADD_ATTR(DEVICE_, ERROR_CORRECTION_SUPPORT); + ADD_ATTR(DEVICE_, PROFILING_TIMER_RESOLUTION); + ADD_ATTR(DEVICE_, ENDIAN_LITTLE); + ADD_ATTR(DEVICE_, AVAILABLE); + ADD_ATTR(DEVICE_, COMPILER_AVAILABLE); + ADD_ATTR(DEVICE_, EXECUTION_CAPABILITIES); + ADD_ATTR(DEVICE_, QUEUE_PROPERTIES); + ADD_ATTR(DEVICE_, NAME); + ADD_ATTR(DEVICE_, VENDOR); + ADD_ATTR(, DRIVER_VERSION); + ADD_ATTR(DEVICE_, VERSION); + ADD_ATTR(DEVICE_, PROFILE); + ADD_ATTR(DEVICE_, VERSION); + ADD_ATTR(DEVICE_, EXTENSIONS); + ADD_ATTR(DEVICE_, PLATFORM); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_HALF); + ADD_ATTR(DEVICE_, HOST_UNIFIED_MEMORY); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_CHAR); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_SHORT); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_INT); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_LONG); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_FLOAT); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_DOUBLE); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_HALF); + ADD_ATTR(DEVICE_, OPENCL_C_VERSION); +#endif +// support for cl_nv_device_attribute_query +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV + ADD_ATTR(DEVICE_, COMPUTE_CAPABILITY_MAJOR_NV); + ADD_ATTR(DEVICE_, COMPUTE_CAPABILITY_MINOR_NV); + ADD_ATTR(DEVICE_, REGISTERS_PER_BLOCK_NV); + ADD_ATTR(DEVICE_, WARP_SIZE_NV); + ADD_ATTR(DEVICE_, GPU_OVERLAP_NV); + ADD_ATTR(DEVICE_, KERNEL_EXEC_TIMEOUT_NV); + ADD_ATTR(DEVICE_, INTEGRATED_MEMORY_NV); +#endif +// {{{ cl_amd_device_attribute_query +#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD + ADD_ATTR(DEVICE_, PROFILING_TIMER_OFFSET_AMD); +#endif +#ifdef CL_DEVICE_TOPOLOGY_AMD + ADD_ATTR(DEVICE_, TOPOLOGY_AMD); +#endif +#ifdef CL_DEVICE_BOARD_NAME_AMD + ADD_ATTR(DEVICE_, BOARD_NAME_AMD); +#endif +#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD + ADD_ATTR(DEVICE_, GLOBAL_FREE_MEMORY_AMD); +#endif +#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD + ADD_ATTR(DEVICE_, SIMD_PER_COMPUTE_UNIT_AMD); +#endif +#ifdef CL_DEVICE_SIMD_WIDTH_AMD + ADD_ATTR(DEVICE_, SIMD_WIDTH_AMD); +#endif +#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD + ADD_ATTR(DEVICE_, SIMD_INSTRUCTION_WIDTH_AMD); +#endif +#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD + ADD_ATTR(DEVICE_, WAVEFRONT_WIDTH_AMD); +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD + ADD_ATTR(DEVICE_, GLOBAL_MEM_CHANNELS_AMD); +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD + ADD_ATTR(DEVICE_, GLOBAL_MEM_CHANNEL_BANKS_AMD); +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD + ADD_ATTR(DEVICE_, GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD); +#endif +#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD + ADD_ATTR(DEVICE_, LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD); +#endif +#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD + ADD_ATTR(DEVICE_, LOCAL_MEM_BANKS_AMD); +#endif +// }}} +#ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT + ADD_ATTR(DEVICE_, MAX_ATOMIC_COUNTERS_EXT); +#endif +#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) + ADD_ATTR(DEVICE_, PARENT_DEVICE_EXT); + ADD_ATTR(DEVICE_, PARTITION_TYPES_EXT); + ADD_ATTR(DEVICE_, AFFINITY_DOMAINS_EXT); + ADD_ATTR(DEVICE_, REFERENCE_COUNT_EXT); + ADD_ATTR(DEVICE_, PARTITION_STYLE_EXT); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(DEVICE_, LINKER_AVAILABLE); + ADD_ATTR(DEVICE_, BUILT_IN_KERNELS); + ADD_ATTR(DEVICE_, IMAGE_MAX_BUFFER_SIZE); + ADD_ATTR(DEVICE_, IMAGE_MAX_ARRAY_SIZE); + ADD_ATTR(DEVICE_, PARENT_DEVICE); + ADD_ATTR(DEVICE_, PARTITION_MAX_SUB_DEVICES); + ADD_ATTR(DEVICE_, PARTITION_PROPERTIES); + ADD_ATTR(DEVICE_, PARTITION_AFFINITY_DOMAIN); + ADD_ATTR(DEVICE_, PARTITION_TYPE); + ADD_ATTR(DEVICE_, REFERENCE_COUNT); + ADD_ATTR(DEVICE_, PREFERRED_INTEROP_USER_SYNC); + ADD_ATTR(DEVICE_, PRINTF_BUFFER_SIZE); +#endif +#ifdef cl_khr_image2d_from_buffer + ADD_ATTR(DEVICE_, IMAGE_PITCH_ALIGNMENT); + ADD_ATTR(DEVICE_, IMAGE_BASE_ADDRESS_ALIGNMENT); +#endif + } + + { + py::class_ cls("device_fp_config", py::no_init); + ADD_ATTR(FP_, DENORM); + ADD_ATTR(FP_, INF_NAN); + ADD_ATTR(FP_, ROUND_TO_NEAREST); + ADD_ATTR(FP_, ROUND_TO_ZERO); + ADD_ATTR(FP_, ROUND_TO_INF); + ADD_ATTR(FP_, FMA); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(FP_, SOFT_FLOAT); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(FP_, CORRECTLY_ROUNDED_DIVIDE_SQRT); +#endif + } + + { + py::class_ cls("device_mem_cache_type", py::no_init); + ADD_ATTR( , NONE); + ADD_ATTR( , READ_ONLY_CACHE); + ADD_ATTR( , READ_WRITE_CACHE); + } + + { + py::class_ cls("device_local_mem_type", py::no_init); + ADD_ATTR( , LOCAL); + ADD_ATTR( , GLOBAL); + } + + { + py::class_ cls("device_exec_capabilities", py::no_init); + ADD_ATTR(EXEC_, KERNEL); + ADD_ATTR(EXEC_, NATIVE_KERNEL); +#ifdef CL_EXEC_IMMEDIATE_EXECUTION_INTEL + ADD_ATTR(EXEC_, IMMEDIATE_EXECUTION_INTEL); +#endif + } + + { + py::class_ cls("command_queue_properties", py::no_init); + ADD_ATTR(QUEUE_, OUT_OF_ORDER_EXEC_MODE_ENABLE); + ADD_ATTR(QUEUE_, PROFILING_ENABLE); +#ifdef CL_QUEUE_IMMEDIATE_EXECUTION_ENABLE_INTEL + ADD_ATTR(QUEUE_, IMMEDIATE_EXECUTION_ENABLE_INTEL); +#endif + } + + { + py::class_ cls("context_info", py::no_init); + ADD_ATTR(CONTEXT_, REFERENCE_COUNT); + ADD_ATTR(CONTEXT_, DEVICES); + ADD_ATTR(CONTEXT_, PROPERTIES); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(CONTEXT_, NUM_DEVICES); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(CONTEXT_, INTEROP_USER_SYNC); +#endif + } + + { + py::class_ cls("gl_context_info", py::no_init); +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) + ADD_ATTR(, CURRENT_DEVICE_FOR_GL_CONTEXT_KHR); + ADD_ATTR(, DEVICES_FOR_GL_CONTEXT_KHR); +#endif + } + + { + py::class_ cls("context_properties", py::no_init); + ADD_ATTR(CONTEXT_, PLATFORM); +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) + ADD_ATTR( ,GL_CONTEXT_KHR); + ADD_ATTR( ,EGL_DISPLAY_KHR); + ADD_ATTR( ,GLX_DISPLAY_KHR); + ADD_ATTR( ,WGL_HDC_KHR); + ADD_ATTR( ,CGL_SHAREGROUP_KHR); +#endif +#if defined(__APPLE__) && defined(HAVE_GL) + ADD_ATTR( ,CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE); +#endif /* __APPLE__ */ +// cl_amd_offline_devices +#ifdef CL_CONTEXT_OFFLINE_DEVICES_AMD + ADD_ATTR(CONTEXT_, OFFLINE_DEVICES_AMD); +#endif + } + + { + py::class_ cls("command_queue_info", py::no_init); + ADD_ATTR(QUEUE_, CONTEXT); + ADD_ATTR(QUEUE_, DEVICE); + ADD_ATTR(QUEUE_, REFERENCE_COUNT); + ADD_ATTR(QUEUE_, PROPERTIES); + } + + { + py::class_ cls("mem_flags", py::no_init); + ADD_ATTR(MEM_, READ_WRITE); + ADD_ATTR(MEM_, WRITE_ONLY); + ADD_ATTR(MEM_, READ_ONLY); + ADD_ATTR(MEM_, USE_HOST_PTR); + ADD_ATTR(MEM_, ALLOC_HOST_PTR); + ADD_ATTR(MEM_, COPY_HOST_PTR); +#ifdef cl_amd_device_memory_flags + ADD_ATTR(MEM_, USE_PERSISTENT_MEM_AMD); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(MEM_, HOST_WRITE_ONLY); + ADD_ATTR(MEM_, HOST_READ_ONLY); + ADD_ATTR(MEM_, HOST_NO_ACCESS); +#endif + } + + { + py::class_ cls("channel_order", py::no_init); + ADD_ATTR( , R); + ADD_ATTR( , A); + ADD_ATTR( , RG); + ADD_ATTR( , RA); + ADD_ATTR( , RGB); + ADD_ATTR( , RGBA); + ADD_ATTR( , BGRA); + ADD_ATTR( , INTENSITY); + ADD_ATTR( , LUMINANCE); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR( , Rx); + ADD_ATTR( , RGx); + ADD_ATTR( , RGBx); +#endif + } + + { + py::class_ cls("channel_type", py::no_init); + ADD_ATTR( , SNORM_INT8); + ADD_ATTR( , SNORM_INT16); + ADD_ATTR( , UNORM_INT8); + ADD_ATTR( , UNORM_INT16); + ADD_ATTR( , UNORM_SHORT_565); + ADD_ATTR( , UNORM_SHORT_555); + ADD_ATTR( , UNORM_INT_101010); + ADD_ATTR( , SIGNED_INT8); + ADD_ATTR( , SIGNED_INT16); + ADD_ATTR( , SIGNED_INT32); + ADD_ATTR( , UNSIGNED_INT8); + ADD_ATTR( , UNSIGNED_INT16); + ADD_ATTR( , UNSIGNED_INT32); + ADD_ATTR( , HALF_FLOAT); + ADD_ATTR( , FLOAT); + } + + { + py::class_ cls("mem_object_type", py::no_init); + ADD_ATTR(MEM_OBJECT_, BUFFER); + ADD_ATTR(MEM_OBJECT_, IMAGE2D); + ADD_ATTR(MEM_OBJECT_, IMAGE3D); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(MEM_OBJECT_, IMAGE2D_ARRAY); + ADD_ATTR(MEM_OBJECT_, IMAGE1D); + ADD_ATTR(MEM_OBJECT_, IMAGE1D_ARRAY); + ADD_ATTR(MEM_OBJECT_, IMAGE1D_BUFFER); +#endif + } + + { + py::class_ cls("mem_info", py::no_init); + ADD_ATTR(MEM_, TYPE); + ADD_ATTR(MEM_, FLAGS); + ADD_ATTR(MEM_, SIZE); + ADD_ATTR(MEM_, HOST_PTR); + ADD_ATTR(MEM_, MAP_COUNT); + ADD_ATTR(MEM_, REFERENCE_COUNT); + ADD_ATTR(MEM_, CONTEXT); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(MEM_, ASSOCIATED_MEMOBJECT); + ADD_ATTR(MEM_, OFFSET); +#endif + } + + { + py::class_ cls("image_info", py::no_init); + ADD_ATTR(IMAGE_, FORMAT); + ADD_ATTR(IMAGE_, ELEMENT_SIZE); + ADD_ATTR(IMAGE_, ROW_PITCH); + ADD_ATTR(IMAGE_, SLICE_PITCH); + ADD_ATTR(IMAGE_, WIDTH); + ADD_ATTR(IMAGE_, HEIGHT); + ADD_ATTR(IMAGE_, DEPTH); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(IMAGE_, ARRAY_SIZE); + ADD_ATTR(IMAGE_, BUFFER); + ADD_ATTR(IMAGE_, NUM_MIP_LEVELS); + ADD_ATTR(IMAGE_, NUM_SAMPLES); +#endif + } + + { + py::class_ cls("addressing_mode", py::no_init); + ADD_ATTR(ADDRESS_, NONE); + ADD_ATTR(ADDRESS_, CLAMP_TO_EDGE); + ADD_ATTR(ADDRESS_, CLAMP); + ADD_ATTR(ADDRESS_, REPEAT); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(ADDRESS_, MIRRORED_REPEAT); +#endif + } + + { + py::class_ cls("filter_mode", py::no_init); + ADD_ATTR(FILTER_, NEAREST); + ADD_ATTR(FILTER_, LINEAR); + } + + { + py::class_ cls("sampler_info", py::no_init); + ADD_ATTR(SAMPLER_, REFERENCE_COUNT); + ADD_ATTR(SAMPLER_, CONTEXT); + ADD_ATTR(SAMPLER_, NORMALIZED_COORDS); + ADD_ATTR(SAMPLER_, ADDRESSING_MODE); + ADD_ATTR(SAMPLER_, FILTER_MODE); + } + + { + py::class_ cls("map_flags", py::no_init); + ADD_ATTR(MAP_, READ); + ADD_ATTR(MAP_, WRITE); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(MAP_, WRITE_INVALIDATE_REGION); +#endif + } + + { + py::class_ cls("program_info", py::no_init); + ADD_ATTR(PROGRAM_, REFERENCE_COUNT); + ADD_ATTR(PROGRAM_, CONTEXT); + ADD_ATTR(PROGRAM_, NUM_DEVICES); + ADD_ATTR(PROGRAM_, DEVICES); + ADD_ATTR(PROGRAM_, SOURCE); + ADD_ATTR(PROGRAM_, BINARY_SIZES); + ADD_ATTR(PROGRAM_, BINARIES); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(PROGRAM_, NUM_KERNELS); + ADD_ATTR(PROGRAM_, KERNEL_NAMES); +#endif + } + + { + py::class_ cls("program_build_info", py::no_init); + ADD_ATTR(PROGRAM_BUILD_, STATUS); + ADD_ATTR(PROGRAM_BUILD_, OPTIONS); + ADD_ATTR(PROGRAM_BUILD_, LOG); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(PROGRAM_, BINARY_TYPE); +#endif + } + + { + py::class_ cls("program_binary_type", py::no_init); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(PROGRAM_BINARY_TYPE_, NONE); + ADD_ATTR(PROGRAM_BINARY_TYPE_, COMPILED_OBJECT); + ADD_ATTR(PROGRAM_BINARY_TYPE_, LIBRARY); + ADD_ATTR(PROGRAM_BINARY_TYPE_, EXECUTABLE); +#endif + } + + { + py::class_ cls("kernel_info", py::no_init); + ADD_ATTR(KERNEL_, FUNCTION_NAME); + ADD_ATTR(KERNEL_, NUM_ARGS); + ADD_ATTR(KERNEL_, REFERENCE_COUNT); + ADD_ATTR(KERNEL_, CONTEXT); + ADD_ATTR(KERNEL_, PROGRAM); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(KERNEL_, ATTRIBUTES); +#endif + } + + { + py::class_ cls("kernel_arg_info", py::no_init); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(KERNEL_ARG_, ADDRESS_QUALIFIER); + ADD_ATTR(KERNEL_ARG_, ACCESS_QUALIFIER); + ADD_ATTR(KERNEL_ARG_, TYPE_NAME); + ADD_ATTR(KERNEL_ARG_, NAME); +#endif + } + + { + py::class_ cls( + "kernel_arg_address_qualifier", py::no_init); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(KERNEL_ARG_ADDRESS_, GLOBAL); + ADD_ATTR(KERNEL_ARG_ADDRESS_, LOCAL); + ADD_ATTR(KERNEL_ARG_ADDRESS_, CONSTANT); + ADD_ATTR(KERNEL_ARG_ADDRESS_, PRIVATE); +#endif + } + + { + py::class_ cls( + "kernel_arg_access_qualifier", py::no_init); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(KERNEL_ARG_ACCESS_, READ_ONLY); + ADD_ATTR(KERNEL_ARG_ACCESS_, WRITE_ONLY); + ADD_ATTR(KERNEL_ARG_ACCESS_, READ_WRITE); + ADD_ATTR(KERNEL_ARG_ACCESS_, NONE); +#endif + } + + { + py::class_ cls("kernel_work_group_info", py::no_init); + ADD_ATTR(KERNEL_, WORK_GROUP_SIZE); + ADD_ATTR(KERNEL_, COMPILE_WORK_GROUP_SIZE); + ADD_ATTR(KERNEL_, LOCAL_MEM_SIZE); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(KERNEL_, PREFERRED_WORK_GROUP_SIZE_MULTIPLE); + ADD_ATTR(KERNEL_, PRIVATE_MEM_SIZE); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(KERNEL_, GLOBAL_WORK_SIZE); +#endif + } + + { + py::class_ cls("event_info", py::no_init); + ADD_ATTR(EVENT_, COMMAND_QUEUE); + ADD_ATTR(EVENT_, COMMAND_TYPE); + ADD_ATTR(EVENT_, REFERENCE_COUNT); + ADD_ATTR(EVENT_, COMMAND_EXECUTION_STATUS); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(EVENT_, CONTEXT); +#endif + } + + { + py::class_ cls("command_type", py::no_init); + ADD_ATTR(COMMAND_, NDRANGE_KERNEL); + ADD_ATTR(COMMAND_, TASK); + ADD_ATTR(COMMAND_, NATIVE_KERNEL); + ADD_ATTR(COMMAND_, READ_BUFFER); + ADD_ATTR(COMMAND_, WRITE_BUFFER); + ADD_ATTR(COMMAND_, COPY_BUFFER); + ADD_ATTR(COMMAND_, READ_IMAGE); + ADD_ATTR(COMMAND_, WRITE_IMAGE); + ADD_ATTR(COMMAND_, COPY_IMAGE); + ADD_ATTR(COMMAND_, COPY_IMAGE_TO_BUFFER); + ADD_ATTR(COMMAND_, COPY_BUFFER_TO_IMAGE); + ADD_ATTR(COMMAND_, MAP_BUFFER); + ADD_ATTR(COMMAND_, MAP_IMAGE); + ADD_ATTR(COMMAND_, UNMAP_MEM_OBJECT); + ADD_ATTR(COMMAND_, MARKER); + ADD_ATTR(COMMAND_, ACQUIRE_GL_OBJECTS); + ADD_ATTR(COMMAND_, RELEASE_GL_OBJECTS); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(COMMAND_, READ_BUFFER_RECT); + ADD_ATTR(COMMAND_, WRITE_BUFFER_RECT); + ADD_ATTR(COMMAND_, COPY_BUFFER_RECT); + ADD_ATTR(COMMAND_, USER); +#endif +#ifdef cl_ext_migrate_memobject + ADD_ATTR(COMMAND_, MIGRATE_MEM_OBJECT_EXT); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(COMMAND_, BARRIER); + ADD_ATTR(COMMAND_, MIGRATE_MEM_OBJECTS); + ADD_ATTR(COMMAND_, FILL_BUFFER); + ADD_ATTR(COMMAND_, FILL_IMAGE); +#endif + } + + { + py::class_ cls("command_execution_status", py::no_init); + ADD_ATTR(, COMPLETE); + ADD_ATTR(, RUNNING); + ADD_ATTR(, SUBMITTED); + ADD_ATTR(, QUEUED); + } + + { + py::class_ cls("profiling_info", py::no_init); + ADD_ATTR(PROFILING_COMMAND_, QUEUED); + ADD_ATTR(PROFILING_COMMAND_, SUBMIT); + ADD_ATTR(PROFILING_COMMAND_, START); + ADD_ATTR(PROFILING_COMMAND_, END); + } + +/* not needed--filled in automatically by implementation. +#if PYOPENCL_CL_VERSION >= 0x1010 + { + py::class_ cls("buffer_create_type", py::no_init); + ADD_ATTR(BUFFER_CREATE_TYPE_, REGION); + } +#endif +*/ + + { + py::class_ cls( + "mem_migration_flags", py::no_init); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(MIGRATE_MEM_OBJECT_, HOST); + ADD_ATTR(MIGRATE_MEM_OBJECT_, CONTENT_UNDEFINED); +#endif + } + + { + py::class_ cls( + "device_partition_property_ext", py::no_init); +#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) + ADD_ATTR_SUFFIX(DEVICE_PARTITION_, EQUALLY, _EXT); + ADD_ATTR_SUFFIX(DEVICE_PARTITION_, BY_COUNTS, _EXT); + ADD_ATTR_SUFFIX(DEVICE_PARTITION_, BY_NAMES, _EXT); + ADD_ATTR_SUFFIX(DEVICE_PARTITION_, BY_AFFINITY_DOMAIN, _EXT); + ADD_ATTR_SUFFIX(, PROPERTIES_LIST_END, _EXT); + ADD_ATTR_SUFFIX(, PARTITION_BY_COUNTS_LIST_END, _EXT); + ADD_ATTR_SUFFIX(, PARTITION_BY_NAMES_LIST_END, _EXT); +#endif + } + + { + py::class_ cls("affinity_domain_ext", py::no_init); +#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) + ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, L1_CACHE, _EXT); + ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, L2_CACHE, _EXT); + ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, L3_CACHE, _EXT); + ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, L4_CACHE, _EXT); + ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, NUMA, _EXT); + ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, NEXT_FISSIONABLE, _EXT); +#endif + } + + { + py::class_ cls( + "device_partition_property", py::no_init); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(DEVICE_PARTITION_, EQUALLY); + ADD_ATTR(DEVICE_PARTITION_, BY_COUNTS); + ADD_ATTR(DEVICE_PARTITION_, BY_COUNTS_LIST_END); + ADD_ATTR(DEVICE_PARTITION_, BY_AFFINITY_DOMAIN); +#endif + } + + { + py::class_ cls("device_affinity_domain", py::no_init); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, NUMA); + ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, L4_CACHE); + ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, L3_CACHE); + ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, L2_CACHE); + ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, L1_CACHE); + ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, NEXT_PARTITIONABLE); +#endif + } + +#ifdef HAVE_GL + { + py::class_ cls("gl_object_type", py::no_init); + ADD_ATTR(GL_OBJECT_, BUFFER); + ADD_ATTR(GL_OBJECT_, TEXTURE2D); + ADD_ATTR(GL_OBJECT_, TEXTURE3D); + ADD_ATTR(GL_OBJECT_, RENDERBUFFER); + } + + { + py::class_ cls("gl_texture_info", py::no_init); + ADD_ATTR(GL_, TEXTURE_TARGET); + ADD_ATTR(GL_, MIPMAP_LEVEL); + } +#endif + + { + py::class_ cls("migrate_mem_object_flags_ext", py::no_init); +#ifdef cl_ext_migrate_memobject + ADD_ATTR_SUFFIX(MIGRATE_MEM_OBJECT_, HOST, _EXT); +#endif + } + + // }}} +} + + + + +// vim: foldmethod=marker diff --git a/src/wrap_helpers.hpp b/src/wrap_helpers.hpp new file mode 100644 index 00000000..dac179c7 --- /dev/null +++ b/src/wrap_helpers.hpp @@ -0,0 +1,175 @@ +#ifndef PYCUDA_WRAP_HELPERS_HEADER_SEEN +#define PYCUDA_WRAP_HELPERS_HEADER_SEEN + + + + +#include +#include +#include + + + + +namespace py = boost::python; + + + + +#if (BOOST_VERSION/100) < 1035 +#warning ******************************************************************* +#warning **** Your version of Boost C++ is likely too old for PyOpenCL. **** +#warning ******************************************************************* +#endif + + + + +#define PYTHON_ERROR(TYPE, REASON) \ +{ \ + PyErr_SetString(PyExc_##TYPE, REASON); \ + throw boost::python::error_already_set(); \ +} + +#define ENUM_VALUE(NAME) \ + value(#NAME, NAME) + +#define DEF_SIMPLE_METHOD(NAME) \ + def(#NAME, &cls::NAME) + +#define DEF_SIMPLE_METHOD_WITH_ARGS(NAME, ARGS) \ + def(#NAME, &cls::NAME, boost::python::args ARGS) + +#define DEF_SIMPLE_FUNCTION(NAME) \ + boost::python::def(#NAME, &NAME) + +#define DEF_SIMPLE_FUNCTION_WITH_ARGS(NAME, ARGS) \ + boost::python::def(#NAME, &NAME, boost::python::args ARGS) + +#define DEF_SIMPLE_RO_MEMBER(NAME) \ + def_readonly(#NAME, &cls::m_##NAME) + +#define DEF_SIMPLE_RW_MEMBER(NAME) \ + def_readwrite(#NAME, &cls::m_##NAME) + +#define PYTHON_FOREACH(NAME, ITERABLE) \ + BOOST_FOREACH(boost::python::object NAME, \ + std::make_pair( \ + boost::python::stl_input_iterator(ITERABLE), \ + boost::python::stl_input_iterator())) + +#define COPY_PY_LIST(TYPE, NAME) \ + std::copy( \ + boost::python::stl_input_iterator(py_##NAME), \ + boost::python::stl_input_iterator(), \ + std::back_inserter(NAME)); + +#define COPY_PY_COORD_TRIPLE(NAME) \ + size_t NAME[3] = {0, 0, 0}; \ + { \ + size_t my_len = len(py_##NAME); \ + if (my_len > 3) \ + throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \ + for (size_t i = 0; i < my_len; ++i) \ + NAME[i] = py::extract(py_##NAME[i])(); \ + } + +#define COPY_PY_PITCH_TUPLE(NAME) \ + size_t NAME[2] = {0, 0}; \ + if (py_##NAME.ptr() != Py_None) \ + { \ + size_t my_len = len(py_##NAME); \ + if (my_len > 2) \ + throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \ + for (size_t i = 0; i < my_len; ++i) \ + NAME[i] = py::extract(py_##NAME[i])(); \ + } + +#define COPY_PY_REGION_TRIPLE(NAME) \ + size_t NAME[3] = {1, 1, 1}; \ + { \ + size_t my_len = len(py_##NAME); \ + if (my_len > 3) \ + throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \ + for (size_t i = 0; i < my_len; ++i) \ + NAME[i] = py::extract(py_##NAME[i])(); \ + } + +#define PYOPENCL_PARSE_NUMPY_ARRAY_SPEC \ + PyArray_Descr *tp_descr; \ + if (PyArray_DescrConverter(dtype.ptr(), &tp_descr) != NPY_SUCCEED) \ + throw py::error_already_set(); \ + \ + py::extract shape_as_int(py_shape); \ + std::vector shape; \ + \ + if (shape_as_int.check()) \ + shape.push_back(shape_as_int()); \ + else \ + COPY_PY_LIST(npy_intp, shape); \ + \ + NPY_ORDER order = PyArray_CORDER; \ + PyArray_OrderConverter(py_order.ptr(), &order); \ + \ + int ary_flags = 0; \ + if (order == PyArray_FORTRANORDER) \ + ary_flags |= NPY_FARRAY; \ + else if (order == PyArray_CORDER) \ + ary_flags |= NPY_CARRAY; \ + else \ + throw std::runtime_error("unrecognized order specifier"); \ + \ + std::vector strides; \ + if (py_strides.ptr() != Py_None) \ + { \ + COPY_PY_LIST(npy_intp, strides); \ + } + +#define PYOPENCL_RETURN_VECTOR(ITEMTYPE, NAME) \ + { \ + py::list pyopencl_result; \ + BOOST_FOREACH(ITEMTYPE item, NAME) \ + pyopencl_result.append(item); \ + return pyopencl_result; \ + } + +namespace +{ + template + inline boost::python::handle<> handle_from_new_ptr(T *ptr) + { + return boost::python::handle<>( + typename boost::python::manage_new_object::apply::type()(ptr)); + } + + template + inline T *from_int_ptr(intptr_t obj_ref) + { + ClType clobj = (ClType) obj_ref; + return new T(clobj, /* retain */ true); + } + + template + inline intptr_t to_int_ptr(T const &obj) + { + return (intptr_t) obj.data(); + } +} + +#define PYOPENCL_EXPOSE_TO_FROM_INT_PTR(CL_TYPENAME) \ + .def("from_int_ptr", from_int_ptr, \ + py::return_value_policy(), \ + py::arg("int_ptr_value"), \ + "(static method) Return a new Python object referencing the C-level " \ + ":c:type:`" #CL_TYPENAME "` object at the location pointed to " \ + "by *int_ptr_value*. The relevant :c:func:`clRetain*` function " \ + "will be called." \ + "\n\n.. versionadded:: 2013.2\n") \ + .staticmethod("from_int_ptr") \ + .add_property("int_ptr", to_int_ptr, \ + "Return an integer corresponding to the pointer value " \ + "of the underlying :c:type:`" #CL_TYPENAME "`. " \ + "Use :meth:`from_int_ptr` to turn back into a Python object." \ + "\n\n.. versionadded:: 2013.2\n") \ + +#endif diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp new file mode 100644 index 00000000..73df3bd1 --- /dev/null +++ b/src/wrap_mempool.cpp @@ -0,0 +1,290 @@ +// Gregor Thalhammer (on Apr 13, 2011) said it's necessary to import Python.h +// first to prevent OS X from overriding a bunch of macros. (e.g. isspace) +#include + +#include +#include "wrap_helpers.hpp" +#include "wrap_cl.hpp" +#include "mempool.hpp" +#include "tools.hpp" +#include + + + + +namespace py = boost::python; + + + + +namespace +{ + class cl_allocator_base + { + protected: + boost::shared_ptr m_context; + cl_mem_flags m_flags; + + public: + cl_allocator_base(boost::shared_ptr const &ctx, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : m_context(ctx), m_flags(flags) + { + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) + throw pyopencl::error("Allocator", CL_INVALID_VALUE, + "cannot specify USE_HOST_PTR or COPY_HOST_PTR flags"); + } + + cl_allocator_base(cl_allocator_base const &src) + : m_context(src.m_context), m_flags(src.m_flags) + { } + + virtual ~cl_allocator_base() + { } + + typedef cl_mem pointer_type; + typedef size_t size_type; + + virtual cl_allocator_base *copy() const = 0; + virtual bool is_deferred() const = 0; + virtual pointer_type allocate(size_type s) = 0; + + void free(pointer_type p) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (p)); + } + + void try_release_blocks() + { + pyopencl::run_python_gc(); + } + }; + + class cl_deferred_allocator : public cl_allocator_base + { + private: + typedef cl_allocator_base super; + + public: + cl_deferred_allocator(boost::shared_ptr const &ctx, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : super(ctx, flags) + { } + + cl_allocator_base *copy() const + { + return new cl_deferred_allocator(*this); + } + + bool is_deferred() const + { return true; } + + pointer_type allocate(size_type s) + { + return pyopencl::create_buffer(m_context->data(), m_flags, s, 0); + } + }; + + const unsigned zero = 0; + + class cl_immediate_allocator : public cl_allocator_base + { + private: + typedef cl_allocator_base super; + pyopencl::command_queue m_queue; + + public: + cl_immediate_allocator(pyopencl::command_queue &queue, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : super(boost::shared_ptr(queue.get_context()), flags), + m_queue(queue.data(), /*retain*/ true) + { } + + cl_immediate_allocator(cl_immediate_allocator const &src) + : super(src), m_queue(src.m_queue) + { } + + cl_allocator_base *copy() const + { + return new cl_immediate_allocator(*this); + } + + bool is_deferred() const + { return false; } + + pointer_type allocate(size_type s) + { + pointer_type ptr = pyopencl::create_buffer( + m_context->data(), m_flags, s, 0); + + // Make sure the buffer gets allocated right here and right now. + // This looks (and is) expensive. But immediate allocators + // have their main use in memory pools, whose basic assumption + // is that allocation is too expensive anyway--but they rely + // on exact 'out-of-memory' information. + unsigned zero = 0; + PYOPENCL_CALL_GUARDED(clEnqueueWriteBuffer, ( + m_queue.data(), + ptr, + /* is blocking */ CL_FALSE, + 0, std::min(s, sizeof(zero)), &zero, + 0, NULL, NULL + )); + + // No need to wait for completion here. clWaitForEvents (e.g.) + // cannot return mem object allocation failures. This implies that + // the buffer is faulted onto the device on enqueue. + + return ptr; + } + }; + + + + + inline + pyopencl::buffer *allocator_call(cl_allocator_base &alloc, size_t size) + { + cl_mem mem; + int try_count = 0; + while (try_count < 2) + { + try + { + mem = alloc.allocate(size); + break; + } + catch (pyopencl::error &e) + { + if (!e.is_out_of_memory()) + throw; + if (++try_count == 2) + throw; + } + + alloc.try_release_blocks(); + } + + try + { + return new pyopencl::buffer(mem, false); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); + throw; + } + } + + + + + class pooled_buffer + : public pyopencl::pooled_allocation >, + public pyopencl::memory_object_holder + { + private: + typedef + pyopencl::pooled_allocation > + super; + + public: + pooled_buffer( + boost::shared_ptr p, super::size_type s) + : super(p, s) + { } + + const super::pointer_type data() const + { return ptr(); } + }; + + + + + pooled_buffer *device_pool_allocate( + boost::shared_ptr > pool, + pyopencl::memory_pool::size_type sz) + { + return new pooled_buffer(pool, sz); + } + + + + + template + void expose_memory_pool(Wrapper &wrapper) + { + typedef typename Wrapper::wrapped_type cls; + wrapper + .add_property("held_blocks", &cls::held_blocks) + .add_property("active_blocks", &cls::active_blocks) + .DEF_SIMPLE_METHOD(bin_number) + .DEF_SIMPLE_METHOD(alloc_size) + .DEF_SIMPLE_METHOD(free_held) + .DEF_SIMPLE_METHOD(stop_holding) + .staticmethod("bin_number") + .staticmethod("alloc_size") + ; + } +} + + + + +void pyopencl_expose_mempool() +{ + py::def("bitlog2", pyopencl::bitlog2); + + { + typedef cl_allocator_base cls; + py::class_ wrapper("_tools_AllocatorBase", py::no_init); + wrapper + .def("__call__", allocator_call, + py::return_value_policy()) + ; + + } + + { + typedef cl_deferred_allocator cls; + py::class_ > wrapper("_tools_DeferredAllocator", + py::init< + boost::shared_ptr const &, + py::optional >()); + } + + { + typedef cl_immediate_allocator cls; + py::class_ > wrapper("_tools_ImmediateAllocator", + py::init >()); + } + + { + typedef pyopencl::memory_pool cls; + + py::class_< + cls, boost::noncopyable, + boost::shared_ptr > wrapper("MemoryPool", + py::init() + ); + wrapper + .def("allocate", device_pool_allocate, + py::return_value_policy()) + .def("__call__", device_pool_allocate, + py::return_value_policy()) + // undoc for now + .DEF_SIMPLE_METHOD(set_trace) + ; + + expose_memory_pool(wrapper); + } + + { + typedef pooled_buffer cls; + py::class_ >( + "PooledBuffer", py::no_init) + .def("release", &cls::free) + ; + } +} -- GitLab From 406c4c02d5871764f1e7ac34c18b11a7f2de994b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 6 Aug 2018 12:12:30 -0500 Subject: [PATCH 03/92] Update setup.py for pybind11 --- setup.py | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/setup.py b/setup.py index 1c9ca77d..add6dded 100644 --- a/setup.py +++ b/setup.py @@ -100,7 +100,7 @@ def get_config_schema(): def main(): from setuptools import find_packages from aksetup_helper import (hack_distutils, get_config, setup, - check_git_submodules) + check_git_submodules, NumpyExtension) check_git_submodules() hack_distutils() @@ -133,6 +133,8 @@ def main(): conf["EXTRA_DEFINES"] = extra_defines + INCLUDE_DIRS = conf["CL_INC_DIR"] + ["pybind11/include"] # noqa: N806 + ver_dic = {} version_file = open("pyopencl/version.py") try: @@ -181,22 +183,6 @@ def main(): print("https://pypi.python.org/pypi/pyopencl") sys.exit(1) - # {{{ write cffi build script - - with open("cffi_build.py.in", "rt") as f: - build_script_template = f.read() - - format_args = {} - for k, v in conf.items(): - format_args[k] = repr(v) - - build_script = build_script_template.format(**format_args) - - with open("cffi_build.py", "wt") as f: - f.write(build_script) - - # }}} - setup(name="pyopencl", # metadata version=ver_dic["VERSION_TEXT"], @@ -230,9 +216,27 @@ def main(): # build info packages=find_packages(), + ext_modules=[ + NumpyExtension("_cl", + [ + "src/wrapper/wrap_cl.cpp", + "src/wrapper/wrap_cl_part_1.cpp", + "src/wrapper/wrap_cl_part_2.cpp", + "src/wrapper/wrap_constants.cpp", + "src/wrapper/wrap_mempool.cpp", + "src/wrapper/bitlog.cpp", + ], + include_dirs=INCLUDE_DIRS, + library_dirs=conf["CL_LIB_DIR"], + libraries=conf["CL_LIBNAME"], + define_macros=list(conf["EXTRA_DEFINES"].items()), + extra_compile_args=conf["CXXFLAGS"], + extra_link_args=conf["LDFLAGS"], + ), + ], + setup_requires=[ "numpy", - "cffi>=1.1.0", ], install_requires=[ @@ -240,14 +244,11 @@ def main(): "pytools>=2017.6", "pytest>=2", "decorator>=3.2.0", - "cffi>=1.1.0", "appdirs>=1.4.0", "six>=1.9.0", # "Mako>=0.3.6", ], - cffi_modules=["cffi_build.py:ffi"], - include_package_data=True, package_data={ "pyopencl": [ -- GitLab From a9e8cc89f40c421490c58876e3a488dd091e1fd5 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 6 Aug 2018 13:46:05 -0500 Subject: [PATCH 04/92] Initial steps in pybind11 conversion [ci skip] --- setup.py | 14 ++++----- src/bitlog.hpp | 17 +++------- src/mempool.hpp | 39 ++++++++++------------- src/numpy_init.hpp | 4 --- src/tools.hpp | 31 ++++++++++++++++-- src/wrap_cl.cpp | 2 +- src/wrap_cl.hpp | 71 ++++++++++++++++++++---------------------- src/wrap_cl_part_1.cpp | 2 +- src/wrap_helpers.hpp | 26 ++-------------- src/wrap_mempool.cpp | 25 ++++++--------- 10 files changed, 105 insertions(+), 126 deletions(-) diff --git a/setup.py b/setup.py index add6dded..beead668 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ def get_config_schema(): IncludeDir, LibraryDir, Libraries, \ Switch, StringListOption - default_cxxflags = ['-std=gnu++11'] + default_cxxflags = [] if 'darwin' in sys.platform: import platform @@ -219,12 +219,12 @@ def main(): ext_modules=[ NumpyExtension("_cl", [ - "src/wrapper/wrap_cl.cpp", - "src/wrapper/wrap_cl_part_1.cpp", - "src/wrapper/wrap_cl_part_2.cpp", - "src/wrapper/wrap_constants.cpp", - "src/wrapper/wrap_mempool.cpp", - "src/wrapper/bitlog.cpp", + #"src/wrap_cl.cpp", + #"src/wrap_cl_part_1.cpp", + #"src/wrap_cl_part_2.cpp", + #"src/wrap_constants.cpp", + "src/wrap_mempool.cpp", + #"src/bitlog.cpp", ], include_dirs=INCLUDE_DIRS, library_dirs=conf["CL_LIB_DIR"], diff --git a/src/bitlog.hpp b/src/bitlog.hpp index 405599e7..e3ffbe01 100644 --- a/src/bitlog.hpp +++ b/src/bitlog.hpp @@ -1,25 +1,18 @@ // Base-2 logarithm bithack. - - - #ifndef _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP #define _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP - - #include -#include - - +#include namespace pyopencl { extern const char log_table_8[]; - inline unsigned bitlog2_16(boost::uint16_t v) + inline unsigned bitlog2_16(uint16_t v) { if (unsigned long t = v >> 8) return 8+log_table_8[t]; @@ -27,9 +20,9 @@ namespace pyopencl return log_table_8[v]; } - inline unsigned bitlog2_32(boost::uint32_t v) + inline unsigned bitlog2_32(uint32_t v) { - if (boost::uint16_t t = v >> 16) + if (uint16_t t = v >> 16) return 16+bitlog2_16(t); else return bitlog2_16(v); @@ -38,7 +31,7 @@ namespace pyopencl inline unsigned bitlog2(unsigned long v) { #if (ULONG_MAX != 4294967295) - if (boost::uint32_t t = v >> 32) + if (uint32_t t = v >> 32) return 32+bitlog2_32(t); else #endif diff --git a/src/mempool.hpp b/src/mempool.hpp index be88f13f..2895e1d0 100644 --- a/src/mempool.hpp +++ b/src/mempool.hpp @@ -1,22 +1,19 @@ // Abstract memory pool implementation - - - #ifndef _AFJDFJSDFSD_PYGPU_HEADER_SEEN_MEMPOOL_HPP #define _AFJDFJSDFSD_PYGPU_HEADER_SEEN_MEMPOOL_HPP - - -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include "wrap_cl.hpp" #include "bitlog.hpp" - - namespace PYGPU_PACKAGE { template @@ -51,14 +48,14 @@ namespace PYGPU_PACKAGE typedef typename Allocator::size_type size_type; private: - typedef boost::uint32_t bin_nr_t; + typedef uint32_t bin_nr_t; typedef std::vector bin_t; - typedef boost::ptr_map container_t; + typedef std::map container_t; container_t m_container; typedef typename container_t::value_type bin_pair_t; - std::auto_ptr m_allocator; + std::unique_ptr m_allocator; // A held block is one that's been released by the application, but that // we are keeping around to dish out again. @@ -242,7 +239,7 @@ namespace PYGPU_PACKAGE void free_held() { - BOOST_FOREACH(bin_pair_t bin_pair, m_container) + for (bin_pair_t bin_pair: m_container) { bin_t &bin = *bin_pair.second; @@ -272,9 +269,8 @@ namespace PYGPU_PACKAGE bool try_to_free_memory() { - BOOST_FOREACH(bin_pair_t bin_pair, - // free largest stuff first - std::make_pair(m_container.rbegin(), m_container.rend())) + // free largest stuff first + for (bin_pair_t bin_pair: reverse(m_container)) { bin_t &bin = *bin_pair.second; @@ -314,11 +310,8 @@ namespace PYGPU_PACKAGE }; - - - template - class pooled_allocation : public boost::noncopyable + class pooled_allocation : public noncopyable { public: typedef Pool pool_type; @@ -326,14 +319,14 @@ namespace PYGPU_PACKAGE typedef typename Pool::size_type size_type; private: - boost::shared_ptr m_pool; + std::shared_ptr m_pool; pointer_type m_ptr; size_type m_size; bool m_valid; public: - pooled_allocation(boost::shared_ptr p, size_type size) + pooled_allocation(std::shared_ptr p, size_type size) : m_pool(p), m_ptr(p->allocate(size)), m_size(size), m_valid(true) { } diff --git a/src/numpy_init.hpp b/src/numpy_init.hpp index 9d34ac57..146cae65 100644 --- a/src/numpy_init.hpp +++ b/src/numpy_init.hpp @@ -2,14 +2,10 @@ #define _FAYHVVAAA_PYOPENCL_HEADER_SEEN_NUMPY_INIT_HPP - - #include #include - - namespace { static struct pyublas_array_importer diff --git a/src/tools.hpp b/src/tools.hpp index 7254ace1..dcd1b50e 100644 --- a/src/tools.hpp +++ b/src/tools.hpp @@ -2,9 +2,8 @@ #define _ASDFDAFVVAFF_PYCUDA_HEADER_SEEN_TOOLS_HPP +#include - -#include #include #include "numpy_init.hpp" @@ -27,13 +26,39 @@ namespace pyopencl inline void run_python_gc() { - namespace py = boost::python; + namespace py = pybind11; py::object gc_mod( py::handle<>( PyImport_ImportModule("gc"))); gc_mod.attr("collect")(); } + + + // https://stackoverflow.com/a/28139075 + template + struct reversion_wrapper { T& iterable; }; + + template + auto begin (reversion_wrapper w) { return std::rbegin(w.iterable); } + + template + auto end (reversion_wrapper w) { return std::rend(w.iterable); } + + template + reversion_wrapper reverse (T&& iterable) { return { iterable }; } + + + // https://stackoverflow.com/a/44175911 + class noncopyable { + public: + noncopyable() = default; + ~noncopyable() = default; + + private: + noncopyable(const noncopyable&) = delete; + noncopyable& operator=(const noncopyable&) = delete; + }; } diff --git a/src/wrap_cl.cpp b/src/wrap_cl.cpp index 9f680f2d..0b231ad0 100644 --- a/src/wrap_cl.cpp +++ b/src/wrap_cl.cpp @@ -13,7 +13,7 @@ extern void pyopencl_expose_part_1(); extern void pyopencl_expose_part_2(); extern void pyopencl_expose_mempool(); -BOOST_PYTHON_MODULE(_cl) +PYBIND11_MODULE(_cl) { pyopencl_expose_constants(); pyopencl_expose_part_1(); diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 6ee2e33b..e5add471 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -49,9 +49,6 @@ #include #include #include -#include -#include -#include #include "wrap_helpers.hpp" #include "numpy_init.hpp" #include "tools.hpp" @@ -509,7 +506,7 @@ namespace pyopencl (num_platforms, platforms.empty( ) ? NULL : &platforms.front(), &num_platforms)); py::list result; - BOOST_FOREACH(cl_platform_id pid, platforms) + for (cl_platform_id pid: platforms) result.append(handle_from_new_ptr( new platform(pid))); @@ -834,7 +831,7 @@ namespace pyopencl (m_device, props_ptr, num_entries, &result.front(), NULL)); py::list py_result; - BOOST_FOREACH(cl_device_id did, result) + for (cl_device_id did: result) py_result.append(handle_from_new_ptr( new pyopencl::device(did, /*retain*/true, device::REF_CL_1_2))); @@ -872,7 +869,7 @@ namespace pyopencl (m_device, props_ptr, num_entries, &result.front(), NULL)); py::list py_result; - BOOST_FOREACH(cl_device_id did, result) + for (cl_device_id did: result) py_result.append(handle_from_new_ptr( new pyopencl::device(did, /*retain*/true, device::REF_FISSION_EXT))); @@ -907,7 +904,7 @@ namespace pyopencl num_devices, devices.empty( ) ? NULL : &devices.front(), &num_devices)); py::list result; - BOOST_FOREACH(cl_device_id did, devices) + for (cl_device_id did: devices) result.append(handle_from_new_ptr( new device(did))); @@ -958,7 +955,7 @@ namespace pyopencl PYOPENCL_GET_VEC_INFO(Context, m_context, param_name, result); py::list py_result; - BOOST_FOREACH(cl_device_id did, result) + for (cl_device_id did: result) py_result.append(handle_from_new_ptr( new pyopencl::device(did))); return py_result; @@ -1245,12 +1242,12 @@ namespace pyopencl } } - std::auto_ptr get_context() const + std::unique_ptr get_context() const { cl_context param_value; PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, (m_queue, CL_QUEUE_CONTEXT, sizeof(param_value), ¶m_value, 0)); - return std::auto_ptr( + return std::unique_ptr( new context(param_value, /*retain*/ true)); } @@ -1358,11 +1355,11 @@ namespace pyopencl // to a Python object and waits for its own completion upon destruction. protected: - std::auto_ptr m_ward; + std::unique_ptr m_ward; public: - nanny_event(cl_event evt, bool retain, std::auto_ptr &ward) + nanny_event(cl_event evt, bool retain, std::unique_ptr &ward) : event(evt, retain), m_ward(ward) { } @@ -1575,7 +1572,7 @@ namespace pyopencl { public: #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE - typedef std::auto_ptr hostbuf_t; + typedef std::unique_ptr hostbuf_t; #else typedef py::object hostbuf_t; #endif @@ -1857,10 +1854,10 @@ namespace pyopencl void *buf = 0; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE - std::auto_ptr retained_buf_obj; + std::unique_ptr retained_buf_obj; if (py_hostbuf.ptr() != Py_None) { - retained_buf_obj = std::auto_ptr(new py_buffer_wrapper); + retained_buf_obj = std::unique_ptr(new py_buffer_wrapper); int py_buf_flags = PyBUF_ANY_CONTIGUOUS; if ((flags & CL_MEM_USE_HOST_PTR) @@ -1947,7 +1944,7 @@ namespace pyopencl PYOPENCL_BUFFER_SIZE_T len; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE - std::auto_ptr ward(new py_buffer_wrapper); + std::unique_ptr ward(new py_buffer_wrapper); ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); @@ -1990,7 +1987,7 @@ namespace pyopencl PYOPENCL_BUFFER_SIZE_T len; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE - std::auto_ptr ward(new py_buffer_wrapper); + std::unique_ptr ward(new py_buffer_wrapper); ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS); @@ -2084,7 +2081,7 @@ namespace pyopencl void *buf; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE - std::auto_ptr ward(new py_buffer_wrapper); + std::unique_ptr ward(new py_buffer_wrapper); ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); @@ -2140,7 +2137,7 @@ namespace pyopencl const void *buf; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE - std::auto_ptr ward(new py_buffer_wrapper); + std::unique_ptr ward(new py_buffer_wrapper); ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS); @@ -2229,7 +2226,7 @@ namespace pyopencl PYOPENCL_BUFFER_SIZE_T pattern_len; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE - std::auto_ptr ward(new py_buffer_wrapper); + std::unique_ptr ward(new py_buffer_wrapper); ward->get(pattern.ptr(), PyBUF_ANY_CONTIGUOUS); @@ -2316,7 +2313,7 @@ namespace pyopencl inline cl_image_format *make_image_format(cl_channel_order ord, cl_channel_type tp) { - std::auto_ptr result(new cl_image_format); + std::unique_ptr result(new cl_image_format); result->image_channel_order = ord; result->image_channel_data_type = tp; return result.release(); @@ -2417,10 +2414,10 @@ namespace pyopencl PYOPENCL_BUFFER_SIZE_T len; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE - std::auto_ptr retained_buf_obj; + std::unique_ptr retained_buf_obj; if (buffer.ptr() != Py_None) { - retained_buf_obj = std::auto_ptr(new py_buffer_wrapper); + retained_buf_obj = std::unique_ptr(new py_buffer_wrapper); int py_buf_flags = PyBUF_ANY_CONTIGUOUS; if ((flags & CL_MEM_USE_HOST_PTR) @@ -2563,10 +2560,10 @@ namespace pyopencl void *buf = 0; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE - std::auto_ptr retained_buf_obj; + std::unique_ptr retained_buf_obj; if (buffer.ptr() != Py_None) { - retained_buf_obj = std::auto_ptr(new py_buffer_wrapper); + retained_buf_obj = std::unique_ptr(new py_buffer_wrapper); int py_buf_flags = PyBUF_ANY_CONTIGUOUS; if ((flags & CL_MEM_USE_HOST_PTR) @@ -2647,7 +2644,7 @@ namespace pyopencl void *buf; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE - std::auto_ptr ward(new py_buffer_wrapper); + std::unique_ptr ward(new py_buffer_wrapper); ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); @@ -2693,7 +2690,7 @@ namespace pyopencl const void *buf; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE - std::auto_ptr ward(new py_buffer_wrapper); + std::unique_ptr ward(new py_buffer_wrapper); ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS); @@ -2826,7 +2823,7 @@ namespace pyopencl const void *color_buf; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE - std::auto_ptr ward(new py_buffer_wrapper); + std::unique_ptr ward(new py_buffer_wrapper); ward->get(color.ptr(), PyBUF_ANY_CONTIGUOUS); @@ -2911,7 +2908,7 @@ namespace pyopencl PYOPENCL_PARSE_NUMPY_ARRAY_SPEC; npy_uintp size_in_bytes = tp_descr->elsize; - BOOST_FOREACH(npy_intp sdim, shape) + for (npy_intp sdim: shape) size_in_bytes *= sdim; py::handle<> result; @@ -2937,7 +2934,7 @@ namespace pyopencl event evt_handle(evt, false); - std::auto_ptr map; + std::unique_ptr map; try { result = py::handle<>(PyArray_NewFromDescr( @@ -2951,7 +2948,7 @@ namespace pyopencl throw pyopencl::error("enqueue_map_buffer", CL_INVALID_VALUE, "miscalculated numpy array size (not contiguous?)"); - map = std::auto_ptr(new memory_map(cq, buf, mapped)); + map = std::unique_ptr(new memory_map(cq, buf, mapped)); } catch (...) { @@ -3011,10 +3008,10 @@ namespace pyopencl event evt_handle(evt, false); - std::auto_ptr map; + std::unique_ptr map; try { - map = std::auto_ptr(new memory_map(cq, img, mapped)); + map = std::unique_ptr(new memory_map(cq, img, mapped)); } catch (...) { @@ -3165,7 +3162,7 @@ namespace pyopencl PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result); py::list py_result; - BOOST_FOREACH(cl_device_id did, result) + for (cl_device_id did: result) py_result.append(handle_from_new_ptr( new pyopencl::device(did))); return py_result; @@ -3294,7 +3291,7 @@ namespace pyopencl } std::vector header_name_ptrs; - BOOST_FOREACH(std::string const &name, header_names) + for (std::string const &name: header_names) header_name_ptrs.push_back(name.c_str()); // }}} @@ -3757,7 +3754,7 @@ namespace pyopencl kernels.empty( ) ? NULL : &kernels.front(), &num_kernels)); py::list result; - BOOST_FOREACH(cl_kernel knl, kernels) + for (cl_kernel knl: kernels) result.append(handle_from_new_ptr(new kernel(knl, true))); return result; @@ -4122,7 +4119,7 @@ namespace pyopencl devices.empty( ) ? NULL : &devices.front(), &size)); py::list result; - BOOST_FOREACH(cl_device_id did, devices) + for (cl_device_id did: devices) result.append(handle_from_new_ptr( new device(did))); diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index f3448aca..cff0734f 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -58,7 +58,7 @@ void pyopencl_expose_part_1() { typedef context cls; py::class_ >("Context", py::no_init) + std::shared_ptr >("Context", py::no_init) .def("__init__", make_constructor(create_context, py::default_call_policies(), (py::arg("devices")=py::object(), diff --git a/src/wrap_helpers.hpp b/src/wrap_helpers.hpp index dac179c7..866e8002 100644 --- a/src/wrap_helpers.hpp +++ b/src/wrap_helpers.hpp @@ -2,27 +2,10 @@ #define PYCUDA_WRAP_HELPERS_HEADER_SEEN +#include -#include -#include -#include - - - - -namespace py = boost::python; - - - - -#if (BOOST_VERSION/100) < 1035 -#warning ******************************************************************* -#warning **** Your version of Boost C++ is likely too old for PyOpenCL. **** -#warning ******************************************************************* -#endif - - +namespace py = pybind11; #define PYTHON_ERROR(TYPE, REASON) \ @@ -53,10 +36,7 @@ namespace py = boost::python; def_readwrite(#NAME, &cls::m_##NAME) #define PYTHON_FOREACH(NAME, ITERABLE) \ - BOOST_FOREACH(boost::python::object NAME, \ - std::make_pair( \ - boost::python::stl_input_iterator(ITERABLE), \ - boost::python::stl_input_iterator())) + for (py::object NAME: ITERABLE) #define COPY_PY_LIST(TYPE, NAME) \ std::copy( \ diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index 73df3bd1..c785d168 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -2,17 +2,12 @@ // first to prevent OS X from overriding a bunch of macros. (e.g. isspace) #include +#include #include #include "wrap_helpers.hpp" -#include "wrap_cl.hpp" +// #include "wrap_cl.hpp" #include "mempool.hpp" #include "tools.hpp" -#include - - - - -namespace py = boost::python; @@ -22,11 +17,11 @@ namespace class cl_allocator_base { protected: - boost::shared_ptr m_context; + std::shared_ptr m_context; cl_mem_flags m_flags; public: - cl_allocator_base(boost::shared_ptr const &ctx, + cl_allocator_base(std::shared_ptr const &ctx, cl_mem_flags flags=CL_MEM_READ_WRITE) : m_context(ctx), m_flags(flags) { @@ -66,7 +61,7 @@ namespace typedef cl_allocator_base super; public: - cl_deferred_allocator(boost::shared_ptr const &ctx, + cl_deferred_allocator(std::shared_ptr const &ctx, cl_mem_flags flags=CL_MEM_READ_WRITE) : super(ctx, flags) { } @@ -96,7 +91,7 @@ namespace public: cl_immediate_allocator(pyopencl::command_queue &queue, cl_mem_flags flags=CL_MEM_READ_WRITE) - : super(boost::shared_ptr(queue.get_context()), flags), + : super(std::shared_ptr(queue.get_context()), flags), m_queue(queue.data(), /*retain*/ true) { } @@ -190,7 +185,7 @@ namespace public: pooled_buffer( - boost::shared_ptr p, super::size_type s) + std::shared_ptr p, super::size_type s) : super(p, s) { } @@ -202,7 +197,7 @@ namespace pooled_buffer *device_pool_allocate( - boost::shared_ptr > pool, + std::shared_ptr > pool, pyopencl::memory_pool::size_type sz) { return new pooled_buffer(pool, sz); @@ -249,7 +244,7 @@ void pyopencl_expose_mempool() typedef cl_deferred_allocator cls; py::class_ > wrapper("_tools_DeferredAllocator", py::init< - boost::shared_ptr const &, + std::shared_ptr const &, py::optional >()); } @@ -264,7 +259,7 @@ void pyopencl_expose_mempool() py::class_< cls, boost::noncopyable, - boost::shared_ptr > wrapper("MemoryPool", + std::shared_ptr > wrapper("MemoryPool", py::init() ); wrapper -- GitLab From 35034647fbb994489c9816d98660c68ab3e8490e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 7 Aug 2018 11:49:45 -0500 Subject: [PATCH 05/92] Headway towards de-boost and pybind11 --- setup.py | 10 +- src/mempool.hpp | 6 +- src/numpy_init.hpp | 1 + src/tools.hpp | 5 +- src/wrap_cl.cpp | 18 +-- src/wrap_cl.hpp | 247 +++++++++++++++++++++-------------------- src/wrap_cl_part_1.cpp | 36 +++--- src/wrap_cl_part_2.cpp | 53 +++++---- src/wrap_helpers.hpp | 51 +++++---- src/wrap_mempool.cpp | 63 ++++++----- 10 files changed, 255 insertions(+), 235 deletions(-) diff --git a/setup.py b/setup.py index beead668..19a21b33 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,11 @@ def get_config_schema(): IncludeDir, LibraryDir, Libraries, \ Switch, StringListOption - default_cxxflags = [] + default_cxxflags = [ + # Required for pybind11: + # https://pybind11.readthedocs.io/en/stable/faq.html#someclass-declared-with-greater-visibility-than-the-type-of-its-field-someclass-member-wattributes + "-fvisibility=hidden" + ] if 'darwin' in sys.platform: import platform @@ -219,12 +223,12 @@ def main(): ext_modules=[ NumpyExtension("_cl", [ - #"src/wrap_cl.cpp", + "src/wrap_cl.cpp", #"src/wrap_cl_part_1.cpp", #"src/wrap_cl_part_2.cpp", #"src/wrap_constants.cpp", "src/wrap_mempool.cpp", - #"src/bitlog.cpp", + "src/bitlog.cpp", ], include_dirs=INCLUDE_DIRS, library_dirs=conf["CL_LIB_DIR"], diff --git a/src/mempool.hpp b/src/mempool.hpp index 2895e1d0..22b582fd 100644 --- a/src/mempool.hpp +++ b/src/mempool.hpp @@ -135,7 +135,7 @@ namespace PYGPU_PACKAGE return *new_bin; } else - return *it->second; + return it->second; } void inc_held_blocks() @@ -241,7 +241,7 @@ namespace PYGPU_PACKAGE { for (bin_pair_t bin_pair: m_container) { - bin_t &bin = *bin_pair.second; + bin_t &bin = bin_pair.second; while (bin.size()) { @@ -272,7 +272,7 @@ namespace PYGPU_PACKAGE // free largest stuff first for (bin_pair_t bin_pair: reverse(m_container)) { - bin_t &bin = *bin_pair.second; + bin_t &bin = bin_pair.second; if (bin.size()) { diff --git a/src/numpy_init.hpp b/src/numpy_init.hpp index 146cae65..2b54a2a5 100644 --- a/src/numpy_init.hpp +++ b/src/numpy_init.hpp @@ -2,6 +2,7 @@ #define _FAYHVVAAA_PYOPENCL_HEADER_SEEN_NUMPY_INIT_HPP +// #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include #include diff --git a/src/tools.hpp b/src/tools.hpp index dcd1b50e..30f4b08f 100644 --- a/src/tools.hpp +++ b/src/tools.hpp @@ -28,10 +28,7 @@ namespace pyopencl { namespace py = pybind11; - py::object gc_mod( - py::handle<>( - PyImport_ImportModule("gc"))); - gc_mod.attr("collect")(); + py::module::import("gc").attr("collect")(); } diff --git a/src/wrap_cl.cpp b/src/wrap_cl.cpp index 0b231ad0..29b546e7 100644 --- a/src/wrap_cl.cpp +++ b/src/wrap_cl.cpp @@ -8,17 +8,17 @@ using namespace pyopencl; -extern void pyopencl_expose_constants(); -extern void pyopencl_expose_part_1(); -extern void pyopencl_expose_part_2(); -extern void pyopencl_expose_mempool(); +extern void pyopencl_expose_constants(py::module &m); +extern void pyopencl_expose_part_1(py::module &m); +extern void pyopencl_expose_part_2(py::module &m); +extern void pyopencl_expose_mempool(py::module &m); -PYBIND11_MODULE(_cl) +PYBIND11_MODULE(_cl, m) { - pyopencl_expose_constants(); - pyopencl_expose_part_1(); - pyopencl_expose_part_2(); - pyopencl_expose_mempool(); + pyopencl_expose_constants(m); + pyopencl_expose_part_1(m); + pyopencl_expose_part_2(m); + pyopencl_expose_mempool(m); } // vim: foldmethod=marker diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index e5add471..4eb86bd8 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -70,7 +70,13 @@ #if PY_VERSION_HEX >= 0x03000000 #define PYOPENCL_USE_NEW_BUFFER_INTERFACE +#define PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(s) std::move(s) +#else +#define PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(s) (s) #endif + + + // }}} @@ -137,7 +143,7 @@ { \ PYTHON_FOREACH(py_dev, py_devices) \ devices_vec.push_back( \ - py::extract(py_dev)().data()); \ + (py_dev).cast().data()); \ num_devices = devices_vec.size(); \ devices = devices_vec.empty( ) ? NULL : &devices_vec.front(); \ } \ @@ -301,7 +307,7 @@ (FIRST_ARG, SECOND_ARG, param_value_size, \ param_value.empty( ) ? NULL : ¶m_value.front(), ¶m_value_size)); \ \ - return py::object( \ + return py::cast( \ param_value.empty( ) ? "" : std::string(¶m_value.front(), param_value_size-1)); \ } @@ -313,7 +319,7 @@ TYPE param_value; \ PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ (FIRST_ARG, SECOND_ARG, sizeof(param_value), ¶m_value, 0)); \ - return py::object(param_value); \ + return py::cast(param_value); \ } // }}} @@ -328,7 +334,7 @@ event_wait_list.resize(len(py_wait_for)); \ PYTHON_FOREACH(evt, py_wait_for) \ event_wait_list[num_events_in_wait_list++] = \ - py::extract(evt)().data(); \ + evt.cast().data(); \ } #define PYOPENCL_WAITLIST_ARGS \ @@ -409,7 +415,7 @@ namespace pyopencl // {{{ buffer interface helper // #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE - class py_buffer_wrapper : public boost::noncopyable + class py_buffer_wrapper : public noncopyable { private: bool m_initialized; @@ -450,7 +456,8 @@ namespace pyopencl // {{{ platform - class platform : boost::noncopyable + + class platform : noncopyable { private: cl_platform_id m_platform; @@ -515,8 +522,10 @@ namespace pyopencl // }}} + // {{{ device - class device : boost::noncopyable + + class device : noncopyable { public: enum reference_type_t { @@ -593,7 +602,7 @@ namespace pyopencl #if PYOPENCL_CL_VERSION >= 0x1020 else if (m_ref_type == REF_CL_1_2) - PYOPENCL_CALL_GUARDED(clReleaseDevice, (m_device)); + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseDevice, (m_device)); #endif } @@ -913,8 +922,10 @@ namespace pyopencl // }}} + // {{{ context - class context : public boost::noncopyable + + class context : public noncopyable { private: cl_context m_context; @@ -991,7 +1002,7 @@ namespace pyopencl case CL_WGL_HDC_KHR: case CL_CGL_SHAREGROUP_KHR: #endif - value = py::object(result[i+1]); + value = py::cast(result[i+1]); break; #endif @@ -1021,8 +1032,6 @@ namespace pyopencl }; - - inline std::vector parse_context_properties( py::object py_properties) @@ -1031,26 +1040,27 @@ namespace pyopencl if (py_properties.ptr() != Py_None) { - PYTHON_FOREACH(prop_tuple, py_properties) + PYTHON_FOREACH(prop_tuple_py, py_properties) { + py::tuple prop_tuple(prop_tuple_py.cast()); + if (len(prop_tuple) != 2) throw error("Context", CL_INVALID_VALUE, "property tuple must have length 2"); - cl_context_properties prop = - py::extract(prop_tuple[0]); + cl_context_properties prop = prop_tuple[0].cast(); props.push_back(prop); if (prop == CL_CONTEXT_PLATFORM) { - py::extract value(prop_tuple[1]); props.push_back( - reinterpret_cast(value().data())); + reinterpret_cast( + prop_tuple[1].cast().data())); } #if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1) #if defined(_WIN32) else if (prop == CL_WGL_HDC_KHR) { // size_t is a stand-in for HANDLE, hopefully has the same size. - size_t hnd = py::extract(prop_tuple[1]); + size_t hnd = (prop_tuple[1]).cast(); props.push_back(hnd); } #endif @@ -1065,11 +1075,10 @@ namespace pyopencl #endif ) { - py::object ctypes = py::import("ctypes"); + py::object ctypes = py::module::import("ctypes"); py::object prop = prop_tuple[1], c_void_p = ctypes.attr("c_void_p"); py::object ptr = ctypes.attr("cast")(prop, c_void_p); - py::extract value(ptr.attr("value")); - props.push_back(value); + props.push_back(ptr.attr("value").cast()); } #endif else @@ -1082,8 +1091,6 @@ namespace pyopencl } - - inline context *create_context_inner(py::object py_devices, py::object py_properties, py::object py_dev_type) @@ -1107,10 +1114,7 @@ namespace pyopencl std::vector devices; PYTHON_FOREACH(py_dev, py_devices) - { - py::extract dev(py_dev); - devices.push_back(dev().data()); - } + devices.push_back(py_dev.cast().data()); PYOPENCL_PRINT_CALL_TRACE("clCreateContext"); ctx = clCreateContext( @@ -1124,7 +1128,7 @@ namespace pyopencl { cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT; if (py_dev_type.ptr() != Py_None) - dev_type = py::extract(py_dev_type)(); + dev_type = py_dev_type.cast(); PYOPENCL_PRINT_CALL_TRACE("clCreateContextFromType"); ctx = clCreateContextFromType(props_ptr, dev_type, 0, 0, &status_code); @@ -1145,8 +1149,6 @@ namespace pyopencl } - - inline context *create_context(py::object py_devices, py::object py_properties, py::object py_dev_type) @@ -1156,13 +1158,11 @@ namespace pyopencl ) } - - - - // }}} + // {{{ command_queue + class command_queue { private: @@ -1271,8 +1271,10 @@ namespace pyopencl // }}} + // {{{ event/synchronization - class event : boost::noncopyable + + class event : noncopyable { private: cl_event m_event; @@ -1360,7 +1362,7 @@ namespace pyopencl public: nanny_event(cl_event evt, bool retain, std::unique_ptr &ward) - : event(evt, retain), m_ward(ward) + : event(evt, retain), m_ward(std::move(ward)) { } ~nanny_event() @@ -1370,8 +1372,7 @@ namespace pyopencl { if (m_ward.get()) { - return py::object(py::handle<>(py::borrowed( - m_ward->m_buf.obj))); + return py::reinterpret_borrow(m_ward->m_buf.obj); } else return py::object(); @@ -1427,7 +1428,7 @@ namespace pyopencl PYTHON_FOREACH(evt, events) event_wait_list[num_events_in_wait_list++] = - py::extract(evt)().data(); + evt.cast().data(); PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, ( PYOPENCL_WAITLIST_ARGS)); @@ -1485,8 +1486,7 @@ namespace pyopencl std::vector event_list(len(py_events)); PYTHON_FOREACH(py_evt, py_events) - event_list[num_events++] = - py::extract(py_evt)().data(); + event_list[num_events++] = py_evt.cast().data(); PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, ( cq.data(), num_events, event_list.empty( ) ? NULL : &event_list.front())); @@ -1543,6 +1543,7 @@ namespace pyopencl // }}} + // {{{ memory_object py::object create_mem_object_wrapper(cl_mem mem); @@ -1568,7 +1569,7 @@ namespace pyopencl - class memory_object : boost::noncopyable, public memory_object_holder + class memory_object : noncopyable, public memory_object_holder { public: #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE @@ -1589,11 +1590,12 @@ namespace pyopencl if (retain) PYOPENCL_CALL_GUARDED(clRetainMemObject, (mem)); - m_hostbuf = hostbuf; + m_hostbuf = PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf); } memory_object(memory_object &src) - : m_valid(true), m_mem(src.m_mem), m_hostbuf(src.m_hostbuf) + : m_valid(true), m_mem(src.m_mem), + m_hostbuf(PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(src.m_hostbuf)) { PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem)); } @@ -1623,10 +1625,7 @@ namespace pyopencl { #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE if (m_hostbuf.get()) - { - return py::object(py::handle<>(py::borrowed( - m_hostbuf->m_buf.obj))); - } + return py::reinterpret_borrow(m_hostbuf->m_buf.obj); else return py::object(); #else @@ -1651,7 +1650,7 @@ namespace pyopencl std::vector mem_objects; PYTHON_FOREACH(mo, py_mem_objects) - mem_objects.push_back(py::extract(mo)().data()); + mem_objects.push_back(mo.cast().data()); cl_event evt; PYOPENCL_RETRY_IF_MEM_ERROR( @@ -1692,7 +1691,7 @@ namespace pyopencl std::vector mem_objects; PYTHON_FOREACH(mo, py_mem_objects) - mem_objects.push_back(py::extract(mo)().data()); + mem_objects.push_back(mo.cast().data()); cl_event evt; PYOPENCL_RETRY_IF_MEM_ERROR( @@ -1709,6 +1708,7 @@ namespace pyopencl // }}} + // {{{ buffer inline cl_mem create_buffer( @@ -1778,7 +1778,7 @@ namespace pyopencl { public: buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) - : memory_object(mem, retain, hostbuf) + : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) { } #if PYOPENCL_CL_VERSION >= 0x1010 @@ -1914,7 +1914,7 @@ namespace pyopencl try { - return new buffer(mem, false, retained_buf_obj); + return new buffer(mem, false, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(retained_buf_obj)); } catch (...) { @@ -2252,13 +2252,14 @@ namespace pyopencl // }}} + // {{{ image class image : public memory_object { public: image(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) - : memory_object(mem, retain, hostbuf) + : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) { } py::object get_image_info(cl_image_info param_name) const @@ -2402,8 +2403,8 @@ namespace pyopencl context const &ctx, cl_mem_flags flags, cl_image_format const &fmt, - py::object shape, - py::object pitches, + py::tuple shape, + py::tuple pitches, py::object buffer) { if (shape.ptr() == Py_None) @@ -2458,8 +2459,8 @@ namespace pyopencl cl_mem mem; if (dims == 2) { - size_t width = py::extract(shape[0]); - size_t height = py::extract(shape[1]); + size_t width = (shape[0]).cast(); + size_t height = (shape[1]).cast(); size_t pitch = 0; if (pitches.ptr() != Py_None) @@ -2467,7 +2468,7 @@ namespace pyopencl if (py::len(pitches) != 1) throw pyopencl::error("Image", CL_INVALID_VALUE, "invalid length of pitch tuple"); - pitch = py::extract(pitches[0]); + pitch = (pitches[0]).cast(); } // check buffer size @@ -2488,9 +2489,9 @@ namespace pyopencl } else if (dims == 3) { - size_t width = py::extract(shape[0]); - size_t height = py::extract(shape[1]); - size_t depth = py::extract(shape[2]); + size_t width = (shape[0]).cast(); + size_t height = (shape[1]).cast(); + size_t depth = (shape[2]).cast(); size_t pitch_x = 0; size_t pitch_y = 0; @@ -2501,8 +2502,8 @@ namespace pyopencl throw pyopencl::error("Image", CL_INVALID_VALUE, "invalid length of pitch tuple"); - pitch_x = py::extract(pitches[0]); - pitch_y = py::extract(pitches[1]); + pitch_x = (pitches[0]).cast(); + pitch_y = (pitches[1]).cast(); } // check buffer size @@ -2533,7 +2534,7 @@ namespace pyopencl try { - return new image(mem, false, retained_buf_obj); + return new image(mem, false, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(retained_buf_obj)); } catch (...) { @@ -2612,7 +2613,7 @@ namespace pyopencl try { - return new image(mem, false, retained_buf_obj); + return new image(mem, false, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(retained_buf_obj)); } catch (...) { @@ -2849,6 +2850,7 @@ namespace pyopencl // }}} + // {{{ maps class memory_map { @@ -2911,7 +2913,7 @@ namespace pyopencl for (npy_intp sdim: shape) size_in_bytes *= sdim; - py::handle<> result; + py::object result; cl_event evt; cl_int status_code; @@ -2937,14 +2939,14 @@ namespace pyopencl std::unique_ptr map; try { - result = py::handle<>(PyArray_NewFromDescr( + result = py::object(py::reinterpret_steal(PyArray_NewFromDescr( &PyArray_Type, tp_descr, shape.size(), shape.empty() ? NULL : &shape.front(), strides.empty() ? NULL : &strides.front(), - mapped, ary_flags, /*obj*/NULL)); + mapped, ary_flags, /*obj*/NULL))); - if (size_in_bytes != (npy_uintp) PyArray_NBYTES(result.get())) + if (size_in_bytes != (npy_uintp) PyArray_NBYTES(result.ptr())) throw pyopencl::error("enqueue_map_buffer", CL_INVALID_VALUE, "miscalculated numpy array size (not contiguous?)"); @@ -2957,9 +2959,9 @@ namespace pyopencl throw; } - py::handle<> map_py(handle_from_new_ptr(map.release())); - PyArray_BASE(result.get()) = map_py.get(); - Py_INCREF(map_py.get()); + py::object map_py(handle_from_new_ptr(map.release())); + PyArray_BASE(result.ptr()) = map_py.ptr(); + Py_INCREF(map_py.ptr()); return py::make_tuple( result, @@ -3020,16 +3022,16 @@ namespace pyopencl throw; } - py::handle<> result = py::handle<>(PyArray_NewFromDescr( + py::object result = py::reinterpret_steal(PyArray_NewFromDescr( &PyArray_Type, tp_descr, shape.size(), shape.empty() ? NULL : &shape.front(), strides.empty() ? NULL : &strides.front(), mapped, ary_flags, /*obj*/NULL)); - py::handle<> map_py(handle_from_new_ptr(map.release())); - PyArray_BASE(result.get()) = map_py.get(); - Py_INCREF(map_py.get()); + py::object map_py(handle_from_new_ptr(map.release())); + PyArray_BASE(result.ptr()) = map_py.ptr(); + Py_INCREF(map_py.ptr()); return py::make_tuple( result, @@ -3039,8 +3041,10 @@ namespace pyopencl // }}} + // {{{ sampler - class sampler : boost::noncopyable + + class sampler : noncopyable { private: cl_sampler m_sampler; @@ -3107,9 +3111,10 @@ namespace pyopencl // }}} + // {{{ program - class program : boost::noncopyable + class program : noncopyable { public: enum program_kind_type { KND_UNKNOWN, KND_SOURCE, KND_BINARY }; @@ -3183,7 +3188,7 @@ namespace pyopencl size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0); - boost::scoped_array result( + std::unique_ptr result( new unsigned char[total_size]); std::vector result_ptrs; @@ -3202,7 +3207,8 @@ namespace pyopencl ptr = result.get(); for (unsigned i = 0; i < sizes.size(); ++i) { - py::handle<> binary_pyobj( + py::object binary_pyobj( + py::reinterpret_steal( #if PY_VERSION_HEX >= 0x03000000 PyBytes_FromStringAndSize( reinterpret_cast(ptr), sizes[i]) @@ -3210,7 +3216,7 @@ namespace pyopencl PyString_FromStringAndSize( reinterpret_cast(ptr), sizes[i]) #endif - ); + )); py_result.append(binary_pyobj); ptr += sizes[i]; } @@ -3278,13 +3284,14 @@ namespace pyopencl std::vector header_names; std::vector programs; - PYTHON_FOREACH(name_hdr_tup, py_headers) + PYTHON_FOREACH(name_hdr_tup_py, py_headers) { + py::tuple name_hdr_tup = py::reinterpret_borrow(name_hdr_tup_py); if (py::len(name_hdr_tup) != 2) throw error("Program.compile", CL_INVALID_VALUE, "epxected (name, header) tuple in headers list"); - std::string name = py::extract(name_hdr_tup[0]); - program &prg = py::extract(name_hdr_tup[1]); + std::string name = (name_hdr_tup[0]).cast(); + program &prg = (name_hdr_tup[1]).cast(); header_names.push_back(name); programs.push_back(prg.data()); @@ -3342,23 +3349,23 @@ namespace pyopencl inline program *create_program_with_binary( context &ctx, - py::object py_devices, - py::object py_binaries) + py::sequence py_devices, + py::sequence py_binaries) { std::vector devices; std::vector binaries; std::vector sizes; std::vector binary_statuses; - int num_devices = len(py_devices); + size_t num_devices = len(py_devices); if (len(py_binaries) != num_devices) throw error("create_program_with_binary", CL_INVALID_VALUE, "device and binary counts don't match"); - for (int i = 0; i < num_devices; ++i) + for (size_t i = 0; i < num_devices; ++i) { devices.push_back( - py::extract(py_devices[i])().data()); + (py_devices[i]).cast().data()); const void *buf; PYOPENCL_BUFFER_SIZE_T len; @@ -3457,7 +3464,7 @@ namespace pyopencl std::vector programs; PYTHON_FOREACH(py_prg, py_programs) { - program &prg = py::extract(py_prg); + program &prg = (py_prg).cast(); programs.push_back(prg.data()); } @@ -3498,6 +3505,7 @@ namespace pyopencl // }}} + // {{{ kernel class local_memory { @@ -3516,7 +3524,7 @@ namespace pyopencl - class kernel : boost::noncopyable + class kernel : noncopyable { private: cl_kernel m_kernel; @@ -3591,7 +3599,7 @@ namespace pyopencl { buf_wrapper.get(py_buffer.ptr(), PyBUF_ANY_CONTIGUOUS); } - catch (py::error_already_set) + catch (py::error_already_set &) { PyErr_Clear(); throw error("Kernel.set_arg", CL_INVALID_VALUE, @@ -3621,26 +3629,26 @@ namespace pyopencl return; } - py::extract ex_mo(arg); - if (ex_mo.check()) + try { - set_arg_mem(arg_index, ex_mo()); + set_arg_mem(arg_index, arg.cast()); return; } + catch (py::cast_error &) { } - py::extract ex_loc(arg); - if (ex_loc.check()) + try { - set_arg_local(arg_index, ex_loc()); + set_arg_local(arg_index, arg.cast()); return; } + catch (py::cast_error &) { } - py::extract ex_smp(arg); - if (ex_smp.check()) + try { - set_arg_sampler(arg_index, ex_smp()); + set_arg_sampler(arg_index, arg.cast()); return; } + catch (py::cast_error &) { } set_arg_buf(arg_index, arg); } @@ -3866,6 +3874,8 @@ namespace pyopencl // }}} + +#if 0 // {{{ gl interop inline bool have_gl() @@ -4026,7 +4036,7 @@ namespace pyopencl \ std::vector mem_objects; \ PYTHON_FOREACH(mo, py_mem_objects) \ - mem_objects.push_back(py::extract(mo)().data()); \ + mem_objects.push_back((mo).cast()().data()); \ \ cl_event evt; \ PYOPENCL_CALL_GUARDED(clEnqueue##What##GLObjects, ( \ @@ -4068,7 +4078,7 @@ namespace pyopencl #if PYOPENCL_CL_VERSION >= 0x1020 if (py_platform.ptr() != Py_None) { - platform &plat = py::extract(py_platform); + platform &plat = (py_platform).cast(); func_ptr = (func_ptr_type) clGetExtensionFunctionAddressForPlatform( plat.data(), "clGetGLContextInfoKHR"); @@ -4134,6 +4144,8 @@ namespace pyopencl #endif // }}} +#endif + // {{{ deferred implementation bits @@ -4223,13 +4235,13 @@ namespace pyopencl } inline - py::handle<> get_mem_obj_host_array( + py::object get_mem_obj_host_array( py::object mem_obj_py, py::object shape, py::object dtype, py::object order_py) { memory_object_holder const &mem_obj = - py::extract(mem_obj_py); + (mem_obj_py).cast(); PyArray_Descr *tp_descr; if (PyArray_DescrConverter(dtype.ptr(), &tp_descr) != NPY_SUCCEED) throw py::error_already_set(); @@ -4241,16 +4253,16 @@ namespace pyopencl "Only MemoryObject with USE_HOST_PTR " "is supported."); - py::extract shape_as_int(shape); std::vector dims; - - if (shape_as_int.check()) - dims.push_back(shape_as_int()); - else - std::copy( - py::stl_input_iterator(shape), - py::stl_input_iterator(), - back_inserter(dims)); + try + { + dims.push_back(py::cast(shape)); + } + catch (py::cast_error &) + { + for (auto it: shape) + dims.push_back(it.cast()); + } NPY_ORDER order = PyArray_CORDER; PyArray_OrderConverter(order_py.ptr(), &order); @@ -4272,24 +4284,23 @@ namespace pyopencl (mem_obj.data(), CL_MEM_SIZE, sizeof(mem_obj_size), &mem_obj_size, 0)); - py::handle<> result = py::handle<>(PyArray_NewFromDescr( + py::object result = py::reinterpret_steal(PyArray_NewFromDescr( &PyArray_Type, tp_descr, dims.size(), &dims.front(), /*strides*/ NULL, host_ptr, ary_flags, /*obj*/NULL)); - if ((size_t) PyArray_NBYTES(result.get()) > mem_obj_size) + if ((size_t) PyArray_NBYTES(result.ptr()) > mem_obj_size) throw pyopencl::error("MemoryObject.get_host_array", CL_INVALID_VALUE, "Resulting array is larger than memory object."); - PyArray_BASE(result.get()) = mem_obj_py.ptr(); + PyArray_BASE(result.ptr()) = mem_obj_py.ptr(); Py_INCREF(mem_obj_py.ptr()); return result; } // }}} - } diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index cff0734f..14ec060c 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -8,12 +8,12 @@ using namespace pyopencl; -void pyopencl_expose_part_1() +void pyopencl_expose_part_1(py::module &m) { py::docstring_options doc_op; doc_op.disable_cpp_signatures(); - py::def("get_cl_header_version", get_cl_header_version); + m.def("get_cl_header_version", get_cl_header_version); // {{{ platform DEF_SIMPLE_FUNCTION(get_platforms); @@ -125,22 +125,22 @@ void pyopencl_expose_part_1() DEF_SIMPLE_FUNCTION(wait_for_events); #if PYOPENCL_CL_VERSION >= 0x1020 - py::def("_enqueue_marker_with_wait_list", enqueue_marker_with_wait_list, + m.def("_enqueue_marker_with_wait_list", enqueue_marker_with_wait_list, (py::arg("queue"), py::arg("wait_for")=py::object()), py::return_value_policy()); #endif - py::def("_enqueue_marker", enqueue_marker, + m.def("_enqueue_marker", enqueue_marker, (py::arg("queue")), py::return_value_policy()); - py::def("_enqueue_wait_for_events", enqueue_wait_for_events, + m.def("_enqueue_wait_for_events", enqueue_wait_for_events, (py::arg("queue"), py::arg("wait_for")=py::object())); #if PYOPENCL_CL_VERSION >= 0x1020 - py::def("_enqueue_barrier_with_wait_list", enqueue_barrier_with_wait_list, + m.def("_enqueue_barrier_with_wait_list", enqueue_barrier_with_wait_list, (py::arg("queue"), py::arg("wait_for")=py::object()), py::return_value_policy()); #endif - py::def("_enqueue_barrier", enqueue_barrier, py::arg("queue")); + m.def("_enqueue_barrier", enqueue_barrier, py::arg("queue")); #if PYOPENCL_CL_VERSION >= 0x1010 { @@ -168,7 +168,7 @@ void pyopencl_expose_part_1() .def(py::self != py::self) .def("__hash__", &cls::hash) - .add_property("int_ptr", to_int_ptr, + .def_property("int_ptr", to_int_ptr, "Return an integer corresponding to the pointer value " "of the underlying :c:type:`cl_mem`. " "Use :meth:`from_int_ptr` to turn back into a Python object." @@ -180,7 +180,7 @@ void pyopencl_expose_part_1() py::class_ >( "MemoryObject", py::no_init) .DEF_SIMPLE_METHOD(release) - .add_property("hostbuf", &cls::hostbuf) + .def_property("hostbuf", &cls::hostbuf) .def("from_int_ptr", memory_object_from_int, "(static method) Return a new Python object referencing the C-level " \ @@ -197,7 +197,7 @@ void pyopencl_expose_part_1() } #if PYOPENCL_CL_VERSION >= 0x1020 - py::def("enqueue_migrate_mem_objects", enqueue_migrate_mem_objects, + m.def("enqueue_migrate_mem_objects", enqueue_migrate_mem_objects, (py::args("queue", "mem_objects"), py::arg("flags")=0, py::arg("wait_for")=py::object() @@ -206,7 +206,7 @@ void pyopencl_expose_part_1() #endif #ifdef cl_ext_migrate_memobject - py::def("enqueue_migrate_mem_object_ext", enqueue_migrate_mem_object_ext, + m.def("enqueue_migrate_mem_object_ext", enqueue_migrate_mem_object_ext, (py::args("queue", "mem_objects"), py::arg("flags")=0, py::arg("wait_for")=py::object() @@ -241,21 +241,21 @@ void pyopencl_expose_part_1() // {{{ transfers // {{{ byte-for-byte - py::def("_enqueue_read_buffer", enqueue_read_buffer, + m.def("_enqueue_read_buffer", enqueue_read_buffer, (py::args("queue", "mem", "hostbuf"), py::arg("device_offset")=0, py::arg("wait_for")=py::object(), py::arg("is_blocking")=true ), py::return_value_policy()); - py::def("_enqueue_write_buffer", enqueue_write_buffer, + m.def("_enqueue_write_buffer", enqueue_write_buffer, (py::args("queue", "mem", "hostbuf"), py::arg("device_offset")=0, py::arg("wait_for")=py::object(), py::arg("is_blocking")=true ), py::return_value_policy()); - py::def("_enqueue_copy_buffer", enqueue_copy_buffer, + m.def("_enqueue_copy_buffer", enqueue_copy_buffer, (py::args("queue", "src", "dst"), py::arg("byte_count")=-1, py::arg("src_offset")=0, @@ -269,7 +269,7 @@ void pyopencl_expose_part_1() // {{{ rectangular #if PYOPENCL_CL_VERSION >= 0x1010 - py::def("_enqueue_read_buffer_rect", enqueue_read_buffer_rect, + m.def("_enqueue_read_buffer_rect", enqueue_read_buffer_rect, (py::args("queue", "mem", "hostbuf", "buffer_origin", "host_origin", "region"), py::arg("buffer_pitches")=py::object(), @@ -278,7 +278,7 @@ void pyopencl_expose_part_1() py::arg("is_blocking")=true ), py::return_value_policy()); - py::def("_enqueue_write_buffer_rect", enqueue_write_buffer_rect, + m.def("_enqueue_write_buffer_rect", enqueue_write_buffer_rect, (py::args("queue", "mem", "hostbuf", "buffer_origin", "host_origin", "region"), py::arg("buffer_pitches")=py::object(), @@ -287,7 +287,7 @@ void pyopencl_expose_part_1() py::arg("is_blocking")=true ), py::return_value_policy()); - py::def("_enqueue_copy_buffer_rect", enqueue_copy_buffer_rect, + m.def("_enqueue_copy_buffer_rect", enqueue_copy_buffer_rect, (py::args("queue", "src", "dst", "src_origin", "dst_origin", "region"), py::arg("src_pitches")=py::object(), @@ -302,7 +302,7 @@ void pyopencl_expose_part_1() // }}} #if PYOPENCL_CL_VERSION >= 0x1020 - py::def("_enqueue_fill_buffer", enqueue_fill_buffer, + m.def("_enqueue_fill_buffer", enqueue_fill_buffer, (py::args("queue", "mem", "pattern", "offset", "size"), py::arg("wait_for")=py::object()), py::return_value_policy()); diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index 4d010796..3ebc5a65 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -45,32 +45,29 @@ using namespace pyopencl; -void pyopencl_expose_part_2() +void pyopencl_expose_part_2(py::module &m) { - py::docstring_options doc_op; - doc_op.disable_cpp_signatures(); - // {{{ image #if PYOPENCL_CL_VERSION >= 0x1020 { typedef cl_image_desc cls; - py::class_("ImageDescriptor") + py::class_(m, "ImageDescriptor") .def_readwrite("image_type", &cls::image_type) - .add_property("shape", &image_desc_dummy_getter, image_desc_set_shape) + .def_property("shape", &image_desc_dummy_getter, image_desc_set_shape) .def_readwrite("array_size", &cls::image_array_size) - .add_property("pitches", &image_desc_dummy_getter, image_desc_set_pitches) + .def_property("pitches", &image_desc_dummy_getter, image_desc_set_pitches) .def_readwrite("num_mip_levels", &cls::num_mip_levels) .def_readwrite("num_samples", &cls::num_samples) - .add_property("buffer", &image_desc_dummy_getter, image_desc_set_buffer) + .def_property("buffer", &image_desc_dummy_getter, image_desc_set_buffer) ; } #endif { typedef image cls; - py::class_, boost::noncopyable>( - "Image", py::no_init) + py::class_/* , boost::noncopyable */>( + m, "Image"/* , py::no_init */) .def("__init__", make_constructor(create_image, py::default_call_policies(), (py::args("context", "flags", "format"), @@ -94,15 +91,15 @@ void pyopencl_expose_part_2() .def("__init__", py::make_constructor(make_image_format)) .def_readwrite("channel_order", &cls::image_channel_order) .def_readwrite("channel_data_type", &cls::image_channel_data_type) - .add_property("channel_count", &get_image_format_channel_count) - .add_property("dtype_size", &get_image_format_channel_dtype_size) - .add_property("itemsize", &get_image_format_item_size) + .def_property("channel_count", &get_image_format_channel_count) + .def_property("dtype_size", &get_image_format_channel_dtype_size) + .def_property("itemsize", &get_image_format_item_size) ; } DEF_SIMPLE_FUNCTION(get_supported_image_formats); - py::def("_enqueue_read_image", enqueue_read_image, + m.def("_enqueue_read_image", enqueue_read_image, (py::args("queue", "mem", "origin", "region", "hostbuf"), py::arg("row_pitch")=0, py::arg("slice_pitch")=0, @@ -110,7 +107,7 @@ void pyopencl_expose_part_2() py::arg("is_blocking")=true ), py::return_value_policy()); - py::def("_enqueue_write_image", enqueue_write_image, + m.def("_enqueue_write_image", enqueue_write_image, (py::args("queue", "mem", "origin", "region", "hostbuf"), py::arg("row_pitch")=0, py::arg("slice_pitch")=0, @@ -119,21 +116,21 @@ void pyopencl_expose_part_2() ), py::return_value_policy()); - py::def("_enqueue_copy_image", enqueue_copy_image, + m.def("_enqueue_copy_image", enqueue_copy_image, (py::args("queue", "src", "dest", "src_origin", "dest_origin", "region"), py::arg("wait_for")=py::object()), py::return_value_policy()); - py::def("_enqueue_copy_image_to_buffer", enqueue_copy_image_to_buffer, + m.def("_enqueue_copy_image_to_buffer", enqueue_copy_image_to_buffer, (py::args("queue", "src", "dest", "origin", "region", "offset"), py::arg("wait_for")=py::object()), py::return_value_policy()); - py::def("_enqueue_copy_buffer_to_image", enqueue_copy_buffer_to_image, + m.def("_enqueue_copy_buffer_to_image", enqueue_copy_buffer_to_image, (py::args("queue", "src", "dest", "offset", "origin", "region"), py::arg("wait_for")=py::object()), py::return_value_policy()); #if PYOPENCL_CL_VERSION >= 0x1020 - py::def("enqueue_fill_image", enqueue_write_image, + m.def("enqueue_fill_image", enqueue_write_image, (py::args("queue", "mem", "color", "origin", "region"), py::arg("wait_for")=py::object()), py::return_value_policy()); @@ -151,7 +148,7 @@ void pyopencl_expose_part_2() ; } - py::def("enqueue_map_buffer", enqueue_map_buffer, + m.def("enqueue_map_buffer", enqueue_map_buffer, (py::args("queue", "buf", "flags", "offset", "shape", "dtype"), @@ -159,7 +156,7 @@ void pyopencl_expose_part_2() py::arg("strides")=py::object(), py::arg("wait_for")=py::object(), py::arg("is_blocking")=true)); - py::def("enqueue_map_image", enqueue_map_image, + m.def("enqueue_map_image", enqueue_map_image, (py::args("queue", "img", "flags", "origin", "region", "shape", "dtype"), @@ -237,7 +234,7 @@ void pyopencl_expose_part_2() } #if PYOPENCL_CL_VERSION >= 0x1020 - py::def("unload_platform_compiler", unload_platform_compiler); + m.def("unload_platform_compiler", unload_platform_compiler); #endif // }}} @@ -265,12 +262,12 @@ void pyopencl_expose_part_2() typedef local_memory cls; py::class_("LocalMemory", py::init(py::arg("size"))) - .add_property("size", &cls::size) + .def_property("size", &cls::size) ; } - py::def("enqueue_nd_range_kernel", enqueue_nd_range_kernel, + m.def("enqueue_nd_range_kernel", enqueue_nd_range_kernel, (py::args("queue", "kernel"), py::arg("global_work_size"), py::arg("local_work_size"), @@ -279,7 +276,7 @@ void pyopencl_expose_part_2() py::arg("g_times_l")=false ), py::return_value_policy()); - py::def("enqueue_task", enqueue_task, + m.def("enqueue_task", enqueue_task, (py::args("queue", "kernel"), py::arg("wait_for")=py::object() ), @@ -333,19 +330,19 @@ void pyopencl_expose_part_2() ; } - py::def("enqueue_acquire_gl_objects", enqueue_acquire_gl_objects, + m.def("enqueue_acquire_gl_objects", enqueue_acquire_gl_objects, (py::args("queue", "mem_objects"), py::arg("wait_for")=py::object() ), py::return_value_policy()); - py::def("enqueue_release_gl_objects", enqueue_release_gl_objects, + m.def("enqueue_release_gl_objects", enqueue_release_gl_objects, (py::args("queue", "mem_objects"), py::arg("wait_for")=py::object() ), py::return_value_policy()); #if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) - py::def("get_gl_context_info_khr", get_gl_context_info_khr, + m.def("get_gl_context_info_khr", get_gl_context_info_khr, (py::args("properties", "param_name"), py::arg("platform")=py::object())); #endif diff --git a/src/wrap_helpers.hpp b/src/wrap_helpers.hpp index 866e8002..6afaa73d 100644 --- a/src/wrap_helpers.hpp +++ b/src/wrap_helpers.hpp @@ -20,14 +20,17 @@ namespace py = pybind11; #define DEF_SIMPLE_METHOD(NAME) \ def(#NAME, &cls::NAME) +#define DEF_SIMPLE_STATIC_METHOD(NAME) \ + def_static(#NAME, &cls::NAME) + #define DEF_SIMPLE_METHOD_WITH_ARGS(NAME, ARGS) \ def(#NAME, &cls::NAME, boost::python::args ARGS) #define DEF_SIMPLE_FUNCTION(NAME) \ - boost::python::def(#NAME, &NAME) + m.def(#NAME, &NAME) #define DEF_SIMPLE_FUNCTION_WITH_ARGS(NAME, ARGS) \ - boost::python::def(#NAME, &NAME, boost::python::args ARGS) + m.def(#NAME, &NAME, py::args ARGS) #define DEF_SIMPLE_RO_MEMBER(NAME) \ def_readonly(#NAME, &cls::m_##NAME) @@ -36,43 +39,46 @@ namespace py = pybind11; def_readwrite(#NAME, &cls::m_##NAME) #define PYTHON_FOREACH(NAME, ITERABLE) \ - for (py::object NAME: ITERABLE) + for (py::handle NAME: ITERABLE) #define COPY_PY_LIST(TYPE, NAME) \ - std::copy( \ - boost::python::stl_input_iterator(py_##NAME), \ - boost::python::stl_input_iterator(), \ - std::back_inserter(NAME)); + { \ + for (auto it: py_##NAME) \ + NAME.push_back(it.cast()); \ + } #define COPY_PY_COORD_TRIPLE(NAME) \ size_t NAME[3] = {0, 0, 0}; \ { \ - size_t my_len = len(py_##NAME); \ + py::tuple py_tup_##NAME = py_##NAME; \ + size_t my_len = len(py_tup_##NAME); \ if (my_len > 3) \ throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \ for (size_t i = 0; i < my_len; ++i) \ - NAME[i] = py::extract(py_##NAME[i])(); \ + NAME[i] = py_tup_##NAME[i].cast(); \ } #define COPY_PY_PITCH_TUPLE(NAME) \ size_t NAME[2] = {0, 0}; \ if (py_##NAME.ptr() != Py_None) \ { \ - size_t my_len = len(py_##NAME); \ + py::tuple py_tup_##NAME = py_##NAME; \ + size_t my_len = len(py_tup_##NAME); \ if (my_len > 2) \ throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \ for (size_t i = 0; i < my_len; ++i) \ - NAME[i] = py::extract(py_##NAME[i])(); \ + NAME[i] = py_tup_##NAME[i].cast(); \ } #define COPY_PY_REGION_TRIPLE(NAME) \ size_t NAME[3] = {1, 1, 1}; \ { \ - size_t my_len = len(py_##NAME); \ + py::tuple py_tup_##NAME = py_##NAME; \ + size_t my_len = len(py_tup_##NAME); \ if (my_len > 3) \ throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \ for (size_t i = 0; i < my_len; ++i) \ - NAME[i] = py::extract(py_##NAME[i])(); \ + NAME[i] = py_tup_##NAME[i].cast(); \ } #define PYOPENCL_PARSE_NUMPY_ARRAY_SPEC \ @@ -80,13 +86,15 @@ namespace py = pybind11; if (PyArray_DescrConverter(dtype.ptr(), &tp_descr) != NPY_SUCCEED) \ throw py::error_already_set(); \ \ - py::extract shape_as_int(py_shape); \ std::vector shape; \ - \ - if (shape_as_int.check()) \ - shape.push_back(shape_as_int()); \ - else \ + try \ + { \ + shape.push_back(py_shape.cast()); \ + } \ + catch (py::cast_error &) \ + { \ COPY_PY_LIST(npy_intp, shape); \ + } \ \ NPY_ORDER order = PyArray_CORDER; \ PyArray_OrderConverter(py_order.ptr(), &order); \ @@ -108,7 +116,7 @@ namespace py = pybind11; #define PYOPENCL_RETURN_VECTOR(ITEMTYPE, NAME) \ { \ py::list pyopencl_result; \ - BOOST_FOREACH(ITEMTYPE item, NAME) \ + for (ITEMTYPE item: NAME) \ pyopencl_result.append(item); \ return pyopencl_result; \ } @@ -116,10 +124,9 @@ namespace py = pybind11; namespace { template - inline boost::python::handle<> handle_from_new_ptr(T *ptr) + inline py::object handle_from_new_ptr(T *ptr) { - return boost::python::handle<>( - typename boost::python::manage_new_object::apply::type()(ptr)); + return py::cast(ptr); } template diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index c785d168..78fa9acd 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -5,13 +5,12 @@ #include #include #include "wrap_helpers.hpp" -// #include "wrap_cl.hpp" +#include "wrap_cl.hpp" #include "mempool.hpp" #include "tools.hpp" - namespace { class cl_allocator_base @@ -209,16 +208,14 @@ namespace template void expose_memory_pool(Wrapper &wrapper) { - typedef typename Wrapper::wrapped_type cls; + typedef typename Wrapper::type cls; wrapper - .add_property("held_blocks", &cls::held_blocks) - .add_property("active_blocks", &cls::active_blocks) - .DEF_SIMPLE_METHOD(bin_number) - .DEF_SIMPLE_METHOD(alloc_size) + .def_property("held_blocks", &cls::held_blocks) + .def_property("active_blocks", &cls::active_blocks) + .DEF_SIMPLE_STATIC_METHOD(bin_number) + .DEF_SIMPLE_STATIC_METHOD(alloc_size) .DEF_SIMPLE_METHOD(free_held) .DEF_SIMPLE_METHOD(stop_holding) - .staticmethod("bin_number") - .staticmethod("alloc_size") ; } } @@ -226,47 +223,53 @@ namespace -void pyopencl_expose_mempool() +void pyopencl_expose_mempool(py::module &m) { - py::def("bitlog2", pyopencl::bitlog2); + m.def("bitlog2", pyopencl::bitlog2); { typedef cl_allocator_base cls; - py::class_ wrapper("_tools_AllocatorBase", py::no_init); + py::class_ wrapper( + m, "_tools_AllocatorBase"/*, py::no_init */); wrapper - .def("__call__", allocator_call, - py::return_value_policy()) + .def("__call__", allocator_call) ; } { typedef cl_deferred_allocator cls; - py::class_ > wrapper("_tools_DeferredAllocator", - py::init< + py::class_> wrapper( + m, "_tools_DeferredAllocator"); + wrapper + .def(py::init< + std::shared_ptr const &>()) + .def(py::init< std::shared_ptr const &, - py::optional >()); + cl_mem_flags>()) + ; } { typedef cl_immediate_allocator cls; - py::class_ > wrapper("_tools_ImmediateAllocator", - py::init >()); + py::class_> wrapper( + m, "_tools_ImmediateAllocator"); + wrapper + .def(py::init()) + .def(py::init()) + ; } { typedef pyopencl::memory_pool cls; py::class_< - cls, boost::noncopyable, - std::shared_ptr > wrapper("MemoryPool", - py::init() - ); + cls, /* boost::noncopyable, */ + std::shared_ptr> wrapper( m, "MemoryPool"); wrapper - .def("allocate", device_pool_allocate, - py::return_value_policy()) - .def("__call__", device_pool_allocate, - py::return_value_policy()) + .def(py::init()) + .def("allocate", device_pool_allocate) + .def("__call__", device_pool_allocate) // undoc for now .DEF_SIMPLE_METHOD(set_trace) ; @@ -276,9 +279,9 @@ void pyopencl_expose_mempool() { typedef pooled_buffer cls; - py::class_ >( - "PooledBuffer", py::no_init) + py::class_ >( + m, "PooledBuffer"/* , py::no_init */) .def("release", &cls::free) ; } -- GitLab From 6a94765b0e68c4be15b7c2da5df432575e19ac35 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 7 Aug 2018 11:58:22 -0500 Subject: [PATCH 06/92] object->sequence fixes [ci skip] --- src/wrap_cl.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 4eb86bd8..c54347ac 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -1594,7 +1594,7 @@ namespace pyopencl } memory_object(memory_object &src) - : m_valid(true), m_mem(src.m_mem), + : m_valid(true), m_mem(src.m_mem), m_hostbuf(PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(src.m_hostbuf)) { PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem)); @@ -2065,8 +2065,8 @@ namespace pyopencl py::object py_buffer_origin, py::object py_host_origin, py::object py_region, - py::object py_buffer_pitches, - py::object py_host_pitches, + py::sequence py_buffer_pitches, + py::sequence py_host_pitches, py::object py_wait_for, bool is_blocking ) @@ -2121,8 +2121,8 @@ namespace pyopencl py::object py_buffer_origin, py::object py_host_origin, py::object py_region, - py::object py_buffer_pitches, - py::object py_host_pitches, + py::sequence py_buffer_pitches, + py::sequence py_host_pitches, py::object py_wait_for, bool is_blocking ) @@ -2176,8 +2176,8 @@ namespace pyopencl py::object py_src_origin, py::object py_dst_origin, py::object py_region, - py::object py_src_pitches, - py::object py_dst_pitches, + py::sequence py_src_pitches, + py::sequence py_dst_pitches, py::object py_wait_for) { PYOPENCL_PARSE_WAIT_FOR; @@ -2403,8 +2403,8 @@ namespace pyopencl context const &ctx, cl_mem_flags flags, cl_image_format const &fmt, - py::tuple shape, - py::tuple pitches, + py::sequence shape, + py::sequence pitches, py::object buffer) { if (shape.ptr() == Py_None) -- GitLab From 906a5b481fd251d429e7f3528f7adc3edc42e28a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 7 Aug 2018 23:34:46 -0500 Subject: [PATCH 07/92] Part 1 almost builds with pybind11 --- setup.py | 2 +- src/mempool.hpp | 6 +- src/wrap_cl.hpp | 10 +- src/wrap_cl_part_1.cpp | 224 +++++++++++++++++++++-------------------- src/wrap_helpers.hpp | 7 +- src/wrap_mempool.cpp | 10 +- 6 files changed, 131 insertions(+), 128 deletions(-) diff --git a/setup.py b/setup.py index 19a21b33..9073178c 100644 --- a/setup.py +++ b/setup.py @@ -224,7 +224,7 @@ def main(): NumpyExtension("_cl", [ "src/wrap_cl.cpp", - #"src/wrap_cl_part_1.cpp", + "src/wrap_cl_part_1.cpp", #"src/wrap_cl_part_2.cpp", #"src/wrap_constants.cpp", "src/wrap_mempool.cpp", diff --git a/src/mempool.hpp b/src/mempool.hpp index 22b582fd..2f649e2b 100644 --- a/src/mempool.hpp +++ b/src/mempool.hpp @@ -130,9 +130,9 @@ namespace PYGPU_PACKAGE typename container_t::iterator it = m_container.find(bin_nr); if (it == m_container.end()) { - bin_t *new_bin = new bin_t; - m_container.insert(bin_nr, new_bin); - return *new_bin; + auto it_and_inserted = m_container.insert(std::make_pair(bin_nr, bin_t())); + assert(it_and_inserted.second); + return it_and_inserted.first->second; } else return it->second; diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index c54347ac..7f962fc6 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -3875,7 +3875,6 @@ namespace pyopencl // }}} -#if 0 // {{{ gl interop inline bool have_gl() @@ -3910,7 +3909,7 @@ namespace pyopencl { public: gl_buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) - : memory_object(mem, retain, hostbuf) + : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) { } }; @@ -3921,7 +3920,7 @@ namespace pyopencl { public: gl_renderbuffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) - : memory_object(mem, retain, hostbuf) + : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) { } }; @@ -3932,7 +3931,7 @@ namespace pyopencl { public: gl_texture(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) - : image(mem, retain, hostbuf) + : image(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) { } py::object get_gl_texture_info(cl_gl_texture_info param_name) @@ -4036,7 +4035,7 @@ namespace pyopencl \ std::vector mem_objects; \ PYTHON_FOREACH(mo, py_mem_objects) \ - mem_objects.push_back((mo).cast()().data()); \ + mem_objects.push_back((mo).cast().data()); \ \ cl_event evt; \ PYOPENCL_CALL_GUARDED(clEnqueue##What##GLObjects, ( \ @@ -4144,7 +4143,6 @@ namespace pyopencl #endif // }}} -#endif // {{{ deferred implementation bits diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index 14ec060c..a4a82a5f 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -1,18 +1,11 @@ #include "wrap_cl.hpp" - - using namespace pyopencl; - - void pyopencl_expose_part_1(py::module &m) { - py::docstring_options doc_op; - doc_op.disable_cpp_signatures(); - m.def("get_cl_header_version", get_cl_header_version); // {{{ platform @@ -20,7 +13,7 @@ void pyopencl_expose_part_1(py::module &m) { typedef platform cls; - py::class_("Platform", py::no_init) + py::class_(m, "Platform") .DEF_SIMPLE_METHOD(get_info) .def("get_devices", &cls::get_devices, py::arg("device_type")=CL_DEVICE_TYPE_ALL) @@ -36,7 +29,7 @@ void pyopencl_expose_part_1(py::module &m) // {{{ device { typedef device cls; - py::class_("Device", py::no_init) + py::class_(m, "Device") .DEF_SIMPLE_METHOD(get_info) .def(py::self == py::self) .def(py::self != py::self) @@ -57,14 +50,16 @@ void pyopencl_expose_part_1(py::module &m) { typedef context cls; - py::class_ >("Context", py::no_init) - .def("__init__", make_constructor(create_context, - py::default_call_policies(), - (py::arg("devices")=py::object(), - py::arg("properties")=py::object(), - py::arg("dev_type")=py::object() - ))) + py::class_>(m, "Context") +#if 0 + .def( + py::init(create_context)/*, + py::arg("devices")=py::object(), + py::arg("properties")=py::object(), + py::arg("dev_type")=py::object() + */ + ) +#endif .DEF_SIMPLE_METHOD(get_info) .def(py::self == py::self) .def(py::self != py::self) @@ -78,10 +73,13 @@ void pyopencl_expose_part_1(py::module &m) // {{{ command queue { typedef command_queue cls; - py::class_("CommandQueue", + py::class_(m, "CommandQueue") + .def( py::init - ((py::arg("context"), py::arg("device")=py::object(), py::arg("properties")=0))) + const device *, cl_command_queue_properties>(), + py::arg("context"), + py::arg("device")=py::object(), + py::arg("properties")=0) .DEF_SIMPLE_METHOD(get_info) #if PYOPENCL_CL_VERSION < 0x1010 .DEF_SIMPLE_METHOD(set_property) @@ -100,7 +98,7 @@ void pyopencl_expose_part_1(py::module &m) // {{{ events/synchronization { typedef event cls; - py::class_("Event", py::no_init) + py::class_(m, "Event") .DEF_SIMPLE_METHOD(get_info) .DEF_SIMPLE_METHOD(get_profiling_info) .DEF_SIMPLE_METHOD(wait) @@ -108,16 +106,11 @@ void pyopencl_expose_part_1(py::module &m) .def(py::self != py::self) .def("__hash__", &cls::hash) PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_event) - - // deprecated, remove in 2015.x. - .def("from_cl_event_as_int", from_int_ptr, - py::return_value_policy()) - .staticmethod("from_cl_event_as_int") ; } { typedef nanny_event cls; - py::class_ >("NannyEvent", py::no_init) + py::class_(m, "NannyEvent") .DEF_SIMPLE_METHOD(get_ward) ; } @@ -126,28 +119,30 @@ void pyopencl_expose_part_1(py::module &m) #if PYOPENCL_CL_VERSION >= 0x1020 m.def("_enqueue_marker_with_wait_list", enqueue_marker_with_wait_list, - (py::arg("queue"), py::arg("wait_for")=py::object()), - py::return_value_policy()); + py::arg("queue"), py::arg("wait_for")=py::object() + ); #endif m.def("_enqueue_marker", enqueue_marker, - (py::arg("queue")), - py::return_value_policy()); + py::arg("queue") + ); m.def("_enqueue_wait_for_events", enqueue_wait_for_events, - (py::arg("queue"), py::arg("wait_for")=py::object())); + py::arg("queue"), py::arg("wait_for")=py::object()); #if PYOPENCL_CL_VERSION >= 0x1020 m.def("_enqueue_barrier_with_wait_list", enqueue_barrier_with_wait_list, - (py::arg("queue"), py::arg("wait_for")=py::object()), - py::return_value_policy()); + py::arg("queue"), py::arg("wait_for")=py::object() + ); #endif m.def("_enqueue_barrier", enqueue_barrier, py::arg("queue")); #if PYOPENCL_CL_VERSION >= 0x1010 { typedef user_event cls; - py::class_, boost::noncopyable>("UserEvent", py::no_init) + py::class_(m, "UserEvent") +#if 0 .def("__init__", make_constructor( create_user_event, py::default_call_policies(), py::args("context"))) +#endif .DEF_SIMPLE_METHOD(set_status) ; } @@ -159,16 +154,19 @@ void pyopencl_expose_part_1(py::module &m) { typedef memory_object_holder cls; - py::class_( - "MemoryObjectHolder", py::no_init) + py::class_(m, "MemoryObjectHolder") .DEF_SIMPLE_METHOD(get_info) .def("get_host_array", get_mem_obj_host_array, - (py::arg("shape"), py::arg("dtype"), py::arg("order")="C")) + py::arg("shape"), + py::arg("dtype"), + py::arg("order")="C") +#if 0 .def(py::self == py::self) .def(py::self != py::self) +#endif .def("__hash__", &cls::hash) - .def_property("int_ptr", to_int_ptr, + .def_property_readonly("int_ptr", to_int_ptr, "Return an integer corresponding to the pointer value " "of the underlying :c:type:`cl_mem`. " "Use :meth:`from_int_ptr` to turn back into a Python object." @@ -177,61 +175,57 @@ void pyopencl_expose_part_1(py::module &m) } { typedef memory_object cls; - py::class_ >( - "MemoryObject", py::no_init) + py::class_(m, "MemoryObject") .DEF_SIMPLE_METHOD(release) - .def_property("hostbuf", &cls::hostbuf) + .def_property_readonly("hostbuf", &cls::hostbuf) - .def("from_int_ptr", memory_object_from_int, + .def_static("from_int_ptr", memory_object_from_int, "(static method) Return a new Python object referencing the C-level " \ ":c:type:`cl_mem` object at the location pointed to " \ "by *int_ptr_value*. The relevant :c:func:`clRetain*` function " \ "will be called." \ "\n\n.. versionadded:: 2013.2\n") \ - .staticmethod("from_int_ptr") - - // deprecated, remove in 2015.x - .def("from_cl_mem_as_int", memory_object_from_int) - .staticmethod("from_cl_mem_as_int") ; } #if PYOPENCL_CL_VERSION >= 0x1020 m.def("enqueue_migrate_mem_objects", enqueue_migrate_mem_objects, - (py::args("queue", "mem_objects"), - py::arg("flags")=0, - py::arg("wait_for")=py::object() - ), - py::return_value_policy()); + py::arg("queue"), + py::arg("mem_objects"), + py::arg("flags")=0, + py::arg("wait_for")=py::object() + ); #endif #ifdef cl_ext_migrate_memobject m.def("enqueue_migrate_mem_object_ext", enqueue_migrate_mem_object_ext, - (py::args("queue", "mem_objects"), - py::arg("flags")=0, - py::arg("wait_for")=py::object() - ), - py::return_value_policy()); + py::arg("queue"), + py::arg("mem_objects"), + py::arg("flags")=0, + py::arg("wait_for")=py::object() + ); #endif // }}} // {{{ buffer { typedef buffer cls; - py::class_, boost::noncopyable>( - "Buffer", py::no_init) + py::class_(m, "Buffer") +#if 0 .def("__init__", make_constructor(create_buffer_py, py::default_call_policies(), (py::args("context", "flags"), py::arg("size")=0, py::arg("hostbuf")=py::object() ))) +#endif #if PYOPENCL_CL_VERSION >= 0x1010 .def("get_sub_region", &cls::get_sub_region, - (py::args("origin", "size"), py::arg("flags")=0), - py::return_value_policy()) - .def("__getitem__", &cls::getitem, - py::return_value_policy()) + py::arg("origin"), + py::arg("size"), + py::arg("flags")=0 + ) + .def("__getitem__", &cls::getitem) #endif ; } @@ -242,27 +236,30 @@ void pyopencl_expose_part_1(py::module &m) // {{{ byte-for-byte m.def("_enqueue_read_buffer", enqueue_read_buffer, - (py::args("queue", "mem", "hostbuf"), - py::arg("device_offset")=0, - py::arg("wait_for")=py::object(), - py::arg("is_blocking")=true - ), - py::return_value_policy()); + py::arg("queue"), + py::arg("mem"), + py::arg("hostbuf"), + py::arg("device_offset")=0, + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true + ); m.def("_enqueue_write_buffer", enqueue_write_buffer, - (py::args("queue", "mem", "hostbuf"), - py::arg("device_offset")=0, - py::arg("wait_for")=py::object(), - py::arg("is_blocking")=true - ), - py::return_value_policy()); + py::arg("queue"), + py::arg("mem"), + py::arg("hostbuf"), + py::arg("device_offset")=0, + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true + ); m.def("_enqueue_copy_buffer", enqueue_copy_buffer, - (py::args("queue", "src", "dst"), - py::arg("byte_count")=-1, - py::arg("src_offset")=0, - py::arg("dst_offset")=0, - py::arg("wait_for")=py::object() - ), - py::return_value_policy()); + py::arg("queue"), + py::arg("src"), + py::arg("dst"), + py::arg("byte_count")=-1, + py::arg("src_offset")=0, + py::arg("dst_offset")=0, + py::arg("wait_for")=py::object() + ); // }}} @@ -270,31 +267,40 @@ void pyopencl_expose_part_1(py::module &m) #if PYOPENCL_CL_VERSION >= 0x1010 m.def("_enqueue_read_buffer_rect", enqueue_read_buffer_rect, - (py::args("queue", "mem", "hostbuf", - "buffer_origin", "host_origin", "region"), - py::arg("buffer_pitches")=py::object(), - py::arg("host_pitches")=py::object(), - py::arg("wait_for")=py::object(), - py::arg("is_blocking")=true - ), - py::return_value_policy()); + py::arg("queue"), + py::arg("mem"), + py::arg("hostbuf"), + py::arg("buffer_origin"), + py::arg("host_origin"), + py::arg("region"), + py::arg("buffer_pitches")=py::object(), + py::arg("host_pitches")=py::object(), + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true + ); m.def("_enqueue_write_buffer_rect", enqueue_write_buffer_rect, - (py::args("queue", "mem", "hostbuf", - "buffer_origin", "host_origin", "region"), - py::arg("buffer_pitches")=py::object(), - py::arg("host_pitches")=py::object(), - py::arg("wait_for")=py::object(), - py::arg("is_blocking")=true - ), - py::return_value_policy()); + py::arg("queue"), + py::arg("mem"), + py::arg("hostbuf"), + py::arg("buffer_origin"), + py::arg("host_origin"), + py::arg("region"), + py::arg("buffer_pitches")=py::object(), + py::arg("host_pitches")=py::object(), + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true + ); m.def("_enqueue_copy_buffer_rect", enqueue_copy_buffer_rect, - (py::args("queue", "src", "dst", - "src_origin", "dst_origin", "region"), - py::arg("src_pitches")=py::object(), - py::arg("dst_pitches")=py::object(), - py::arg("wait_for")=py::object() - ), - py::return_value_policy()); + py::arg("queue"), + py::arg("src"), + py::arg("dst"), + py::arg("src_origin"), + py::arg("dst_origin"), + py::arg("region"), + py::arg("src_pitches")=py::object(), + py::arg("dst_pitches")=py::object(), + py::arg("wait_for")=py::object() + ); #endif // }}} @@ -303,9 +309,9 @@ void pyopencl_expose_part_1(py::module &m) #if PYOPENCL_CL_VERSION >= 0x1020 m.def("_enqueue_fill_buffer", enqueue_fill_buffer, - (py::args("queue", "mem", "pattern", "offset", "size"), - py::arg("wait_for")=py::object()), - py::return_value_policy()); + py::arg("queue"), py::arg("mem"), py::arg("pattern"), + py::arg("offset"), py::arg("size"), + py::arg("wait_for")=py::object()); #endif } diff --git a/src/wrap_helpers.hpp b/src/wrap_helpers.hpp index 6afaa73d..1384d79a 100644 --- a/src/wrap_helpers.hpp +++ b/src/wrap_helpers.hpp @@ -3,6 +3,7 @@ #include +#include namespace py = pybind11; @@ -144,16 +145,14 @@ namespace } #define PYOPENCL_EXPOSE_TO_FROM_INT_PTR(CL_TYPENAME) \ - .def("from_int_ptr", from_int_ptr, \ - py::return_value_policy(), \ + .def_static("from_int_ptr", from_int_ptr, \ py::arg("int_ptr_value"), \ "(static method) Return a new Python object referencing the C-level " \ ":c:type:`" #CL_TYPENAME "` object at the location pointed to " \ "by *int_ptr_value*. The relevant :c:func:`clRetain*` function " \ "will be called." \ "\n\n.. versionadded:: 2013.2\n") \ - .staticmethod("from_int_ptr") \ - .add_property("int_ptr", to_int_ptr, \ + .def_property_readonly("int_ptr", to_int_ptr, \ "Return an integer corresponding to the pointer value " \ "of the underlying :c:type:`" #CL_TYPENAME "`. " \ "Use :meth:`from_int_ptr` to turn back into a Python object." \ diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp index 78fa9acd..a15efeb6 100644 --- a/src/wrap_mempool.cpp +++ b/src/wrap_mempool.cpp @@ -210,8 +210,8 @@ namespace { typedef typename Wrapper::type cls; wrapper - .def_property("held_blocks", &cls::held_blocks) - .def_property("active_blocks", &cls::active_blocks) + .def_property_readonly("held_blocks", &cls::held_blocks) + .def_property_readonly("active_blocks", &cls::active_blocks) .DEF_SIMPLE_STATIC_METHOD(bin_number) .DEF_SIMPLE_STATIC_METHOD(alloc_size) .DEF_SIMPLE_METHOD(free_held) @@ -239,7 +239,7 @@ void pyopencl_expose_mempool(py::module &m) { typedef cl_deferred_allocator cls; - py::class_> wrapper( + py::class_ wrapper( m, "_tools_DeferredAllocator"); wrapper .def(py::init< @@ -252,7 +252,7 @@ void pyopencl_expose_mempool(py::module &m) { typedef cl_immediate_allocator cls; - py::class_> wrapper( + py::class_ wrapper( m, "_tools_ImmediateAllocator"); wrapper .def(py::init()) @@ -280,7 +280,7 @@ void pyopencl_expose_mempool(py::module &m) { typedef pooled_buffer cls; py::class_ >( + pyopencl::memory_object_holder>( m, "PooledBuffer"/* , py::no_init */) .def("release", &cls::free) ; -- GitLab From f37989821cb787bfeabe4cb6fdc5a7ac9f48c31e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 7 Aug 2018 23:59:33 -0500 Subject: [PATCH 08/92] Part 1 and 2 *almost* compile with pybind 11 [ci skip] --- setup.py | 2 +- src/wrap_cl_part_2.cpp | 192 +++++++++++++++++++++++++++-------------- 2 files changed, 127 insertions(+), 67 deletions(-) diff --git a/setup.py b/setup.py index 9073178c..8e85799d 100644 --- a/setup.py +++ b/setup.py @@ -225,7 +225,7 @@ def main(): [ "src/wrap_cl.cpp", "src/wrap_cl_part_1.cpp", - #"src/wrap_cl_part_2.cpp", + "src/wrap_cl_part_2.cpp", #"src/wrap_constants.cpp", "src/wrap_mempool.cpp", "src/bitlog.cpp", diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index 3ebc5a65..4e8f0b8e 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -66,20 +66,26 @@ void pyopencl_expose_part_2(py::module &m) { typedef image cls; - py::class_/* , boost::noncopyable */>( - m, "Image"/* , py::no_init */) + py::class_(m, "Image"/* , py::no_init */) +#if 0 .def("__init__", make_constructor(create_image, py::default_call_policies(), - (py::args("context", "flags", "format"), - py::arg("shape")=py::object(), - py::arg("pitches")=py::object(), - py::arg("hostbuf")=py::object() + py::arg("context"), + py::arg("flags"), + py::arg("format"), + py::arg("shape")=py::object(), + py::arg("pitches")=py::object(), + py::arg("hostbuf")=py::object() ))) #if PYOPENCL_CL_VERSION >= 0x1020 .def("__init__", make_constructor(create_image_from_desc, py::default_call_policies(), - (py::args("context", "flags", "format", "desc"), - py::arg("hostbuf")=py::object()))) + py::arg("context"), + py::arg("flags"), + py::arg("format"), + py::arg("desc"), + py::arg("hostbuf")=py::object())) +#endif #endif .DEF_SIMPLE_METHOD(get_image_info) ; @@ -87,57 +93,85 @@ void pyopencl_expose_part_2(py::module &m) { typedef cl_image_format cls; - py::class_("ImageFormat") + py::class_(m, "ImageFormat") +#if 0 .def("__init__", py::make_constructor(make_image_format)) +#endif .def_readwrite("channel_order", &cls::image_channel_order) .def_readwrite("channel_data_type", &cls::image_channel_data_type) - .def_property("channel_count", &get_image_format_channel_count) - .def_property("dtype_size", &get_image_format_channel_dtype_size) - .def_property("itemsize", &get_image_format_item_size) + .def_property_readonly("channel_count", &get_image_format_channel_count) + .def_property_readonly("dtype_size", &get_image_format_channel_dtype_size) + .def_property_readonly("itemsize", &get_image_format_item_size) ; } DEF_SIMPLE_FUNCTION(get_supported_image_formats); m.def("_enqueue_read_image", enqueue_read_image, - (py::args("queue", "mem", "origin", "region", "hostbuf"), - py::arg("row_pitch")=0, - py::arg("slice_pitch")=0, - py::arg("wait_for")=py::object(), - py::arg("is_blocking")=true - ), - py::return_value_policy()); + py::arg("queue"), + py::arg("mem"), + py::arg("origin"), + py::arg("region"), + py::arg("hostbuf"), + py::arg("row_pitch")=0, + py::arg("slice_pitch")=0, + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true + ); m.def("_enqueue_write_image", enqueue_write_image, - (py::args("queue", "mem", "origin", "region", "hostbuf"), - py::arg("row_pitch")=0, - py::arg("slice_pitch")=0, - py::arg("wait_for")=py::object(), - py::arg("is_blocking")=true - ), - py::return_value_policy()); + py::arg("queue"), + py::arg("mem"), + py::arg("origin"), + py::arg("region"), + py::arg("hostbuf"), + py::arg("row_pitch")=0, + py::arg("slice_pitch")=0, + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true + ); m.def("_enqueue_copy_image", enqueue_copy_image, - (py::args("queue", "src", "dest", "src_origin", "dest_origin", "region"), - py::arg("wait_for")=py::object()), - py::return_value_policy()); + py::arg("queue"), + py::arg("src"), + py::arg("dest"), + py::arg("src_origin"), + py::arg("dest_origin"), + py::arg("region"), + py::arg("wait_for")=py::object() + ); m.def("_enqueue_copy_image_to_buffer", enqueue_copy_image_to_buffer, - (py::args("queue", "src", "dest", "origin", "region", "offset"), - py::arg("wait_for")=py::object()), - py::return_value_policy()); + py::arg("queue"), + py::arg("src"), + py::arg("dest"), + py::arg("origin"), + py::arg("region"), + py::arg("offset"), + py::arg("wait_for")=py::object() + ); m.def("_enqueue_copy_buffer_to_image", enqueue_copy_buffer_to_image, - (py::args("queue", "src", "dest", "offset", "origin", "region"), - py::arg("wait_for")=py::object()), - py::return_value_policy()); + py::arg("queue"), + py::arg("src"), + py::arg("dest"), + py::arg("offset"), + py::arg("origin"), + py::arg("region"), + py::arg("wait_for")=py::object() + ); #if PYOPENCL_CL_VERSION >= 0x1020 - m.def("enqueue_fill_image", enqueue_write_image, - (py::args("queue", "mem", "color", "origin", "region"), - py::arg("wait_for")=py::object()), - py::return_value_policy()); + m.def("enqueue_fill_image", enqueue_fill_image, + py::arg("queue"), + py::arg("mem"), + py::arg("color"), + py::arg("origin"), + py::arg("region"), + py::arg("wait_for")=py::object() + ); #endif // }}} +#if 0 // {{{ memory_map { typedef memory_map cls; @@ -149,21 +183,28 @@ void pyopencl_expose_part_2(py::module &m) } m.def("enqueue_map_buffer", enqueue_map_buffer, - (py::args("queue", "buf", "flags", - "offset", - "shape", "dtype"), - py::arg("order")="C", - py::arg("strides")=py::object(), - py::arg("wait_for")=py::object(), - py::arg("is_blocking")=true)); + py::arg("queue"), + py::arg("buf"), + py::arg("flags"), + py::arg("offset"), + py::arg("shape"), + py::arg("dtype"), + py::arg("order")="C", + py::arg("strides")=py::object(), + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true)); m.def("enqueue_map_image", enqueue_map_image, - (py::args("queue", "img", "flags", - "origin", "region", - "shape", "dtype"), - py::arg("order")="C", - py::arg("strides")=py::object(), - py::arg("wait_for")=py::object(), - py::arg("is_blocking")=true)); + py::arg("queue"), + py::arg("img"), + py::arg("flags"), + py::arg("origin"), + py::arg("region"), + py::arg("shape"), + py::arg("dtype"), + py::arg("order")="C", + py::arg("strides")=py::object(), + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=true); // }}} @@ -195,16 +236,21 @@ void pyopencl_expose_part_2(py::module &m) .def("__init__", make_constructor( create_program_with_source, py::default_call_policies(), - py::args("context", "src"))) + py::arg("context"), + py::arg("src"))) .def("__init__", make_constructor( create_program_with_binary, py::default_call_policies(), - py::args("context", "devices", "binaries"))) + py::arg("context"), + py::arg("devices"), + py::arg("binaries")))) #if (PYOPENCL_CL_VERSION >= 0x1020) && \ ((PYOPENCL_CL_VERSION >= 0x1030) && defined(__APPLE__)) .def("create_with_built_in_kernels", create_program_with_built_in_kernels, - py::args("context", "devices", "kernel_names"), + py::arg("context"), + py::arg("devices"), + py::arg("kernel_names"), py::return_value_policy()) .staticmethod("create_with_built_in_kernels") #endif @@ -268,7 +314,8 @@ void pyopencl_expose_part_2(py::module &m) m.def("enqueue_nd_range_kernel", enqueue_nd_range_kernel, - (py::args("queue", "kernel"), + py::arg("queue"), + py::arg("kernel"), py::arg("global_work_size"), py::arg("local_work_size"), py::arg("global_work_offset")=py::object(), @@ -277,7 +324,8 @@ void pyopencl_expose_part_2(py::module &m) ), py::return_value_policy()); m.def("enqueue_task", enqueue_task, - (py::args("queue", "kernel"), + py::arg("queue"), + py::arg("kernel"), py::arg("wait_for")=py::object() ), py::return_value_policy()); @@ -300,7 +348,9 @@ void pyopencl_expose_part_2(py::module &m) "GLBuffer", py::no_init) .def("__init__", make_constructor(create_from_gl_buffer, py::default_call_policies(), - (py::args("context", "flags", "bufobj")))) + py::arg("context"), + py::arg("flags"), + py::arg("bufobj"))) .def("get_gl_object_info", get_gl_object_info) ; } @@ -311,7 +361,9 @@ void pyopencl_expose_part_2(py::module &m) "GLRenderBuffer", py::no_init) .def("__init__", make_constructor(create_from_gl_renderbuffer, py::default_call_policies(), - (py::args("context", "flags", "bufobj")))) + py::arg("context"), + py::arg("flags"), + py::arg("bufobj"))) .def("get_gl_object_info", get_gl_object_info) ; } @@ -322,32 +374,40 @@ void pyopencl_expose_part_2(py::module &m) "GLTexture", py::no_init) .def("__init__", make_constructor(create_from_gl_texture, py::default_call_policies(), - (py::args("context", "flags", - "texture_target", "miplevel", - "texture", "dims")))) + py::arg("context"), + py::arg("flags"), + py::arg("texture_target"), + py::arg("miplevel"), + py::arg("texture"), + py::arg("dims"))) .def("get_gl_object_info", get_gl_object_info) .DEF_SIMPLE_METHOD(get_gl_texture_info) ; } m.def("enqueue_acquire_gl_objects", enqueue_acquire_gl_objects, - (py::args("queue", "mem_objects"), + py::arg("queue"), + py::arg("mem_objects"), py::arg("wait_for")=py::object() ), py::return_value_policy()); m.def("enqueue_release_gl_objects", enqueue_release_gl_objects, - (py::args("queue", "mem_objects"), + py::arg("queue"), + py::arg("mem_objects"), py::arg("wait_for")=py::object() ), py::return_value_policy()); #if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) m.def("get_gl_context_info_khr", get_gl_context_info_khr, - (py::args("properties", "param_name"), py::arg("platform")=py::object())); + py::arg("properties"), + py::arg("param_name"), + py::arg("platform")=py::object())); #endif #endif // }}} +#endif } -- GitLab From d288461154bf6b8b33234af780a2c28aa80f955b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 8 Aug 2018 09:21:02 -0500 Subject: [PATCH 09/92] [pybind] Use lambda for custom contxt constructor [ci skip] --- src/wrap_cl_part_1.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index a4a82a5f..cffe95d4 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -51,15 +51,22 @@ void pyopencl_expose_part_1(py::module &m) { typedef context cls; py::class_>(m, "Context") -#if 0 .def( - py::init(create_context)/*, + py::init( + [](py::object py_devices, py::object py_properties, + py::object py_dev_type) + { + PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( + return create_context_inner( + py_devices, + py_properties, + py_dev_type); + ) + }), py::arg("devices")=py::object(), py::arg("properties")=py::object(), py::arg("dev_type")=py::object() - */ ) -#endif .DEF_SIMPLE_METHOD(get_info) .def(py::self == py::self) .def(py::self != py::self) -- GitLab From f832f60456f5dedebe08a1fa7b29f0afb2d5a722 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 8 Aug 2018 19:17:45 -0500 Subject: [PATCH 10/92] Most of the wrapper builds with pybind11 [ci skip] --- setup.py | 2 +- src/wrap_cl.hpp | 4 +- src/wrap_cl_part_1.cpp | 34 ++++--- src/wrap_cl_part_2.cpp | 221 +++++++++++++++++++++++------------------ src/wrap_constants.cpp | 106 ++++++++++---------- 5 files changed, 200 insertions(+), 167 deletions(-) diff --git a/setup.py b/setup.py index 8e85799d..0fbe8c1d 100644 --- a/setup.py +++ b/setup.py @@ -226,7 +226,7 @@ def main(): "src/wrap_cl.cpp", "src/wrap_cl_part_1.cpp", "src/wrap_cl_part_2.cpp", - #"src/wrap_constants.cpp", + "src/wrap_constants.cpp", "src/wrap_mempool.cpp", "src/bitlog.cpp", ], diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 7f962fc6..def89e6e 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -25,6 +25,8 @@ #else // elsewhere ------------------------------------------------------------------ +#define CL_TARGET_OPENCL_VERSION 220 + #include #include @@ -1519,7 +1521,7 @@ namespace pyopencl inline - event *create_user_event(context &ctx) + user_event *create_user_event(context &ctx) { cl_int status_code; PYOPENCL_PRINT_CALL_TRACE("clCreateUserEvent"); diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index cffe95d4..318ba1a9 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -146,10 +146,12 @@ void pyopencl_expose_part_1(py::module &m) { typedef user_event cls; py::class_(m, "UserEvent") -#if 0 - .def("__init__", make_constructor( - create_user_event, py::default_call_policies(), py::args("context"))) -#endif + .def(py::init( + [](context &ctx) + { + return create_user_event(ctx); + }), + py::arg("context")) .DEF_SIMPLE_METHOD(set_status) ; } @@ -167,10 +169,8 @@ void pyopencl_expose_part_1(py::module &m) py::arg("shape"), py::arg("dtype"), py::arg("order")="C") -#if 0 - .def(py::self == py::self) - .def(py::self != py::self) -#endif + .def("__eq__", [](const cls &self, const cls &other){ return self == other; }) + .def("__ne__", [](const cls &self, const cls &other){ return self != other; }) .def("__hash__", &cls::hash) .def_property_readonly("int_ptr", to_int_ptr, @@ -218,14 +218,16 @@ void pyopencl_expose_part_1(py::module &m) { typedef buffer cls; py::class_(m, "Buffer") -#if 0 - .def("__init__", make_constructor(create_buffer_py, - py::default_call_policies(), - (py::args("context", "flags"), - py::arg("size")=0, - py::arg("hostbuf")=py::object() - ))) -#endif + .def( + py::init( + [](context &ctx, cl_mem_flags flags, size_t size, py::object py_hostbuf) + { return create_buffer_py(ctx, flags, size, py_hostbuf); } + ), + py::arg("context"), + py::arg("flags"), + py::arg("size")=0, + py::arg("hostbuf")=py::object() + ) #if PYOPENCL_CL_VERSION >= 0x1010 .def("get_sub_region", &cls::get_sub_region, py::arg("origin"), diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index 4e8f0b8e..e4360882 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -67,25 +67,43 @@ void pyopencl_expose_part_2(py::module &m) { typedef image cls; py::class_(m, "Image"/* , py::no_init */) -#if 0 - .def("__init__", make_constructor(create_image, - py::default_call_policies(), - py::arg("context"), - py::arg("flags"), - py::arg("format"), - py::arg("shape")=py::object(), - py::arg("pitches")=py::object(), - py::arg("hostbuf")=py::object() - ))) + .def( + py::init( + []( + context const &ctx, + cl_mem_flags flags, + cl_image_format const &fmt, + py::sequence shape, + py::sequence pitches, + py::object buffer) + { + return create_image(ctx, flags, fmt, shape, pitches, buffer); + }), + py::arg("context"), + py::arg("flags"), + py::arg("format"), + py::arg("shape")=py::object(), + py::arg("pitches")=py::object(), + py::arg("hostbuf")=py::object() + ) #if PYOPENCL_CL_VERSION >= 0x1020 - .def("__init__", make_constructor(create_image_from_desc, - py::default_call_policies(), - py::arg("context"), - py::arg("flags"), - py::arg("format"), - py::arg("desc"), - py::arg("hostbuf")=py::object())) -#endif + .def( + py::init( + []( + context const &ctx, + cl_mem_flags flags, + cl_image_format const &fmt, + cl_image_desc &desc, + py::object buffer) + { + return create_image_from_desc(ctx, flags, fmt, desc, buffer); + }), + py::arg("context"), + py::arg("flags"), + py::arg("format"), + py::arg("desc"), + py::arg("hostbuf")=py::object() + ) #endif .DEF_SIMPLE_METHOD(get_image_info) ; @@ -94,9 +112,12 @@ void pyopencl_expose_part_2(py::module &m) { typedef cl_image_format cls; py::class_(m, "ImageFormat") -#if 0 - .def("__init__", py::make_constructor(make_image_format)) -#endif + .def( + py::init( + [](cl_channel_order ord, cl_channel_type tp) + { + return make_image_format(ord, tp); + })) .def_readwrite("channel_order", &cls::image_channel_order) .def_readwrite("channel_data_type", &cls::image_channel_data_type) .def_property_readonly("channel_count", &get_image_format_channel_count) @@ -171,14 +192,14 @@ void pyopencl_expose_part_2(py::module &m) // }}} -#if 0 // {{{ memory_map { typedef memory_map cls; - py::class_("MemoryMap", py::no_init) + py::class_(m, "MemoryMap") .def("release", &cls::release, - (py::arg("queue")=0, py::arg("wait_for")=py::object()), - py::return_value_policy()) + py::arg("queue")=0, + py::arg("wait_for")=py::object() + ) ; } @@ -192,7 +213,7 @@ void pyopencl_expose_part_2(py::module &m) py::arg("order")="C", py::arg("strides")=py::object(), py::arg("wait_for")=py::object(), - py::arg("is_blocking")=true)); + py::arg("is_blocking")=true); m.def("enqueue_map_image", enqueue_map_image, py::arg("queue"), py::arg("img"), @@ -211,8 +232,8 @@ void pyopencl_expose_part_2(py::module &m) // {{{ sampler { typedef sampler cls; - py::class_("Sampler", - py::init()) + py::class_(m, "Sampler") + .def(py::init()) .DEF_SIMPLE_METHOD(get_info) .def(py::self == py::self) .def(py::self != py::self) @@ -226,50 +247,56 @@ void pyopencl_expose_part_2(py::module &m) // {{{ program { typedef program cls; - py::enum_("program_kind") + py::enum_(m, "program_kind") .value("UNKNOWN", cls::KND_UNKNOWN) .value("SOURCE", cls::KND_SOURCE) .value("BINARY", cls::KND_BINARY) ; - py::class_("_Program", py::no_init) - .def("__init__", make_constructor( - create_program_with_source, - py::default_call_policies(), - py::arg("context"), - py::arg("src"))) - .def("__init__", make_constructor( - create_program_with_binary, - py::default_call_policies(), - py::arg("context"), - py::arg("devices"), - py::arg("binaries")))) + py::class_(m, "_Program") + .def( + py::init( + [](context &ctx, std::string const &src) + { + return create_program_with_source(ctx, src); + }), + py::arg("context"), + py::arg("src")) + .def( + py::init( + [](context &ctx, py::sequence devices, py::sequence binaries) + { + return create_program_with_binary(ctx, devices, binaries); + }), + py::arg("context"), + py::arg("devices"), + py::arg("binaries")) #if (PYOPENCL_CL_VERSION >= 0x1020) && \ ((PYOPENCL_CL_VERSION >= 0x1030) && defined(__APPLE__)) - .def("create_with_built_in_kernels", + .def_static("create_with_built_in_kernels", create_program_with_built_in_kernels, py::arg("context"), py::arg("devices"), py::arg("kernel_names"), py::return_value_policy()) - .staticmethod("create_with_built_in_kernels") #endif .DEF_SIMPLE_METHOD(kind) .DEF_SIMPLE_METHOD(get_info) .DEF_SIMPLE_METHOD(get_build_info) .def("_build", &cls::build, - (py::arg("options")="", py::arg("devices")=py::object())) + py::arg("options")="", + py::arg("devices")=py::object()) #if PYOPENCL_CL_VERSION >= 0x1020 .def("compile", &cls::compile, - (py::arg("options")="", py::arg("devices")=py::object(), - py::arg("headers")=py::list())) - .def("link", &link_program, - (py::arg("context"), - py::arg("programs"), - py::arg("options")="", - py::arg("devices")=py::object()), - py::return_value_policy()) - .staticmethod("link") + py::arg("options")="", + py::arg("devices")=py::object(), + py::arg("headers")=py::list()) + .def_static("link", &link_program, + py::arg("context"), + py::arg("programs"), + py::arg("options")="", + py::arg("devices")=py::object() + ) #endif .def(py::self == py::self) .def(py::self != py::self) @@ -289,8 +316,8 @@ void pyopencl_expose_part_2(py::module &m) { typedef kernel cls; - py::class_("Kernel", - py::init()) + py::class_(m, "Kernel") + .def(py::init()) .DEF_SIMPLE_METHOD(get_info) .DEF_SIMPLE_METHOD(get_work_group_info) .DEF_SIMPLE_METHOD(set_arg) @@ -306,9 +333,11 @@ void pyopencl_expose_part_2(py::module &m) { typedef local_memory cls; - py::class_("LocalMemory", - py::init(py::arg("size"))) - .def_property("size", &cls::size) + py::class_(m, "LocalMemory") + .def( + py::init(), + py::arg("size")) + .def_property_readonly("size", &cls::size) ; } @@ -321,14 +350,12 @@ void pyopencl_expose_part_2(py::module &m) py::arg("global_work_offset")=py::object(), py::arg("wait_for")=py::object(), py::arg("g_times_l")=false - ), - py::return_value_policy()); + ); m.def("enqueue_task", enqueue_task, py::arg("queue"), py::arg("kernel"), py::arg("wait_for")=py::object() - ), - py::return_value_policy()); + ); // TODO: clEnqueueNativeKernel // }}} @@ -344,42 +371,52 @@ void pyopencl_expose_part_2(py::module &m) { typedef gl_buffer cls; - py::class_, boost::noncopyable>( - "GLBuffer", py::no_init) - .def("__init__", make_constructor(create_from_gl_buffer, - py::default_call_policies(), - py::arg("context"), - py::arg("flags"), - py::arg("bufobj"))) + py::class_(m, "GLBuffer") + .def( + py::init( + [](context &ctx, cl_mem_flags flags, GLuint bufobj) + { + return create_from_gl_buffer(ctx, flags, bufobj); + }), + py::arg("context"), + py::arg("flags"), + py::arg("bufobj")) .def("get_gl_object_info", get_gl_object_info) ; } { typedef gl_renderbuffer cls; - py::class_, boost::noncopyable>( - "GLRenderBuffer", py::no_init) - .def("__init__", make_constructor(create_from_gl_renderbuffer, - py::default_call_policies(), - py::arg("context"), - py::arg("flags"), - py::arg("bufobj"))) + py::class_(m, "GLRenderBuffer") + .def( + py::init( + [](context &ctx, cl_mem_flags flags, GLuint bufobj) + { + return create_from_gl_renderbuffer(ctx, flags, bufobj); + }), + py::arg("context"), + py::arg("flags"), + py::arg("bufobj")) .def("get_gl_object_info", get_gl_object_info) ; } { typedef gl_texture cls; - py::class_, boost::noncopyable>( - "GLTexture", py::no_init) - .def("__init__", make_constructor(create_from_gl_texture, - py::default_call_policies(), - py::arg("context"), - py::arg("flags"), - py::arg("texture_target"), - py::arg("miplevel"), - py::arg("texture"), - py::arg("dims"))) + py::class_(m, "GLTexture") + .def( + py::init( + [](context &ctx, cl_mem_flags flags, GLenum texture_target, + GLint miplevel, GLuint texture, unsigned dims) + { + return create_from_gl_texture(ctx, flags, texture_target, miplevel, texture, dims); + }), + py::arg("context"), + py::arg("flags"), + py::arg("texture_target"), + py::arg("miplevel"), + py::arg("texture"), + py::arg("dims")) .def("get_gl_object_info", get_gl_object_info) .DEF_SIMPLE_METHOD(get_gl_texture_info) ; @@ -389,28 +426,24 @@ void pyopencl_expose_part_2(py::module &m) py::arg("queue"), py::arg("mem_objects"), py::arg("wait_for")=py::object() - ), - py::return_value_policy()); + ); m.def("enqueue_release_gl_objects", enqueue_release_gl_objects, py::arg("queue"), py::arg("mem_objects"), py::arg("wait_for")=py::object() - ), - py::return_value_policy()); + ); #if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) m.def("get_gl_context_info_khr", get_gl_context_info_khr, py::arg("properties"), py::arg("param_name"), - py::arg("platform")=py::object())); + py::arg("platform")=py::object() + ); #endif #endif // }}} -#endif } - - // vim: foldmethod=marker diff --git a/src/wrap_constants.cpp b/src/wrap_constants.cpp index 64511d01..5b195687 100644 --- a/src/wrap_constants.cpp +++ b/src/wrap_constants.cpp @@ -1,16 +1,13 @@ #include "wrap_cl.hpp" - - using namespace pyopencl; - - namespace { - py::handle<> +#if 0 + py::handle CLError, CLMemoryError, CLLogicError, @@ -30,8 +27,7 @@ namespace else PyErr_SetObject(CLError.get(), py::object(err).ptr()); } - - +#endif // {{{ 'fake' constant scopes @@ -88,10 +84,9 @@ namespace } - - -void pyopencl_expose_constants() +void pyopencl_expose_constants(py::module &m) { +#if 0 // {{{ exceptions #define DECLARE_EXC(NAME, BASE) \ CL##NAME = py::handle<>(PyErr_NewException("pyopencl." #NAME, BASE, NULL)); \ @@ -106,6 +101,7 @@ void pyopencl_expose_constants() py::register_exception_translator(translate_cl_error); } // }}} +#endif // {{{ constants #define ADD_ATTR(PREFIX, NAME) \ @@ -115,7 +111,7 @@ void pyopencl_expose_constants() { typedef error cls; - py::class_ ("_error", py::no_init) + py::class_ (m, "_error") .DEF_SIMPLE_METHOD(routine) .DEF_SIMPLE_METHOD(code) .DEF_SIMPLE_METHOD(what) @@ -123,7 +119,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("status_code", py::no_init); + py::class_ cls(m, "status_code"); ADD_ATTR(, SUCCESS); ADD_ATTR(, DEVICE_NOT_FOUND); @@ -209,7 +205,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("platform_info", py::no_init); + py::class_ cls(m, "platform_info"); ADD_ATTR(PLATFORM_, PROFILE); ADD_ATTR(PLATFORM_, VERSION); ADD_ATTR(PLATFORM_, NAME); @@ -220,7 +216,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("device_type", py::no_init); + py::class_ cls(m, "device_type"); ADD_ATTR(DEVICE_TYPE_, DEFAULT); ADD_ATTR(DEVICE_TYPE_, CPU); ADD_ATTR(DEVICE_TYPE_, GPU); @@ -232,7 +228,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("device_info", py::no_init); + py::class_ cls(m, "device_info"); ADD_ATTR(DEVICE_, TYPE); ADD_ATTR(DEVICE_, VENDOR_ID); ADD_ATTR(DEVICE_, MAX_COMPUTE_UNITS); @@ -384,7 +380,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("device_fp_config", py::no_init); + py::class_ cls(m, "device_fp_config"); ADD_ATTR(FP_, DENORM); ADD_ATTR(FP_, INF_NAN); ADD_ATTR(FP_, ROUND_TO_NEAREST); @@ -400,20 +396,20 @@ void pyopencl_expose_constants() } { - py::class_ cls("device_mem_cache_type", py::no_init); + py::class_ cls(m, "device_mem_cache_type"); ADD_ATTR( , NONE); ADD_ATTR( , READ_ONLY_CACHE); ADD_ATTR( , READ_WRITE_CACHE); } { - py::class_ cls("device_local_mem_type", py::no_init); + py::class_ cls(m, "device_local_mem_type"); ADD_ATTR( , LOCAL); ADD_ATTR( , GLOBAL); } { - py::class_ cls("device_exec_capabilities", py::no_init); + py::class_ cls(m, "device_exec_capabilities"); ADD_ATTR(EXEC_, KERNEL); ADD_ATTR(EXEC_, NATIVE_KERNEL); #ifdef CL_EXEC_IMMEDIATE_EXECUTION_INTEL @@ -422,7 +418,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("command_queue_properties", py::no_init); + py::class_ cls(m, "command_queue_properties"); ADD_ATTR(QUEUE_, OUT_OF_ORDER_EXEC_MODE_ENABLE); ADD_ATTR(QUEUE_, PROFILING_ENABLE); #ifdef CL_QUEUE_IMMEDIATE_EXECUTION_ENABLE_INTEL @@ -431,7 +427,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("context_info", py::no_init); + py::class_ cls(m, "context_info"); ADD_ATTR(CONTEXT_, REFERENCE_COUNT); ADD_ATTR(CONTEXT_, DEVICES); ADD_ATTR(CONTEXT_, PROPERTIES); @@ -444,7 +440,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("gl_context_info", py::no_init); + py::class_ cls(m, "gl_context_info"); #if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) ADD_ATTR(, CURRENT_DEVICE_FOR_GL_CONTEXT_KHR); ADD_ATTR(, DEVICES_FOR_GL_CONTEXT_KHR); @@ -452,7 +448,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("context_properties", py::no_init); + py::class_ cls(m, "context_properties"); ADD_ATTR(CONTEXT_, PLATFORM); #if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) ADD_ATTR( ,GL_CONTEXT_KHR); @@ -471,7 +467,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("command_queue_info", py::no_init); + py::class_ cls(m, "command_queue_info"); ADD_ATTR(QUEUE_, CONTEXT); ADD_ATTR(QUEUE_, DEVICE); ADD_ATTR(QUEUE_, REFERENCE_COUNT); @@ -479,7 +475,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("mem_flags", py::no_init); + py::class_ cls(m, "mem_flags"); ADD_ATTR(MEM_, READ_WRITE); ADD_ATTR(MEM_, WRITE_ONLY); ADD_ATTR(MEM_, READ_ONLY); @@ -497,7 +493,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("channel_order", py::no_init); + py::class_ cls(m, "channel_order"); ADD_ATTR( , R); ADD_ATTR( , A); ADD_ATTR( , RG); @@ -515,7 +511,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("channel_type", py::no_init); + py::class_ cls(m, "channel_type"); ADD_ATTR( , SNORM_INT8); ADD_ATTR( , SNORM_INT16); ADD_ATTR( , UNORM_INT8); @@ -534,7 +530,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("mem_object_type", py::no_init); + py::class_ cls(m, "mem_object_type"); ADD_ATTR(MEM_OBJECT_, BUFFER); ADD_ATTR(MEM_OBJECT_, IMAGE2D); ADD_ATTR(MEM_OBJECT_, IMAGE3D); @@ -547,7 +543,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("mem_info", py::no_init); + py::class_ cls(m, "mem_info"); ADD_ATTR(MEM_, TYPE); ADD_ATTR(MEM_, FLAGS); ADD_ATTR(MEM_, SIZE); @@ -562,7 +558,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("image_info", py::no_init); + py::class_ cls(m, "image_info"); ADD_ATTR(IMAGE_, FORMAT); ADD_ATTR(IMAGE_, ELEMENT_SIZE); ADD_ATTR(IMAGE_, ROW_PITCH); @@ -579,7 +575,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("addressing_mode", py::no_init); + py::class_ cls(m, "addressing_mode"); ADD_ATTR(ADDRESS_, NONE); ADD_ATTR(ADDRESS_, CLAMP_TO_EDGE); ADD_ATTR(ADDRESS_, CLAMP); @@ -590,13 +586,13 @@ void pyopencl_expose_constants() } { - py::class_ cls("filter_mode", py::no_init); + py::class_ cls(m, "filter_mode"); ADD_ATTR(FILTER_, NEAREST); ADD_ATTR(FILTER_, LINEAR); } { - py::class_ cls("sampler_info", py::no_init); + py::class_ cls(m, "sampler_info"); ADD_ATTR(SAMPLER_, REFERENCE_COUNT); ADD_ATTR(SAMPLER_, CONTEXT); ADD_ATTR(SAMPLER_, NORMALIZED_COORDS); @@ -605,7 +601,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("map_flags", py::no_init); + py::class_ cls(m, "map_flags"); ADD_ATTR(MAP_, READ); ADD_ATTR(MAP_, WRITE); #if PYOPENCL_CL_VERSION >= 0x1020 @@ -614,7 +610,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("program_info", py::no_init); + py::class_ cls(m, "program_info"); ADD_ATTR(PROGRAM_, REFERENCE_COUNT); ADD_ATTR(PROGRAM_, CONTEXT); ADD_ATTR(PROGRAM_, NUM_DEVICES); @@ -629,7 +625,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("program_build_info", py::no_init); + py::class_ cls(m, "program_build_info"); ADD_ATTR(PROGRAM_BUILD_, STATUS); ADD_ATTR(PROGRAM_BUILD_, OPTIONS); ADD_ATTR(PROGRAM_BUILD_, LOG); @@ -639,7 +635,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("program_binary_type", py::no_init); + py::class_ cls(m, "program_binary_type"); #if PYOPENCL_CL_VERSION >= 0x1020 ADD_ATTR(PROGRAM_BINARY_TYPE_, NONE); ADD_ATTR(PROGRAM_BINARY_TYPE_, COMPILED_OBJECT); @@ -649,7 +645,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("kernel_info", py::no_init); + py::class_ cls(m, "kernel_info"); ADD_ATTR(KERNEL_, FUNCTION_NAME); ADD_ATTR(KERNEL_, NUM_ARGS); ADD_ATTR(KERNEL_, REFERENCE_COUNT); @@ -661,7 +657,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("kernel_arg_info", py::no_init); + py::class_ cls(m, "kernel_arg_info"); #if PYOPENCL_CL_VERSION >= 0x1020 ADD_ATTR(KERNEL_ARG_, ADDRESS_QUALIFIER); ADD_ATTR(KERNEL_ARG_, ACCESS_QUALIFIER); @@ -672,7 +668,7 @@ void pyopencl_expose_constants() { py::class_ cls( - "kernel_arg_address_qualifier", py::no_init); + m, "kernel_arg_address_qualifier"); #if PYOPENCL_CL_VERSION >= 0x1020 ADD_ATTR(KERNEL_ARG_ADDRESS_, GLOBAL); ADD_ATTR(KERNEL_ARG_ADDRESS_, LOCAL); @@ -683,7 +679,7 @@ void pyopencl_expose_constants() { py::class_ cls( - "kernel_arg_access_qualifier", py::no_init); + m, "kernel_arg_access_qualifier"); #if PYOPENCL_CL_VERSION >= 0x1020 ADD_ATTR(KERNEL_ARG_ACCESS_, READ_ONLY); ADD_ATTR(KERNEL_ARG_ACCESS_, WRITE_ONLY); @@ -693,7 +689,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("kernel_work_group_info", py::no_init); + py::class_ cls(m, "kernel_work_group_info"); ADD_ATTR(KERNEL_, WORK_GROUP_SIZE); ADD_ATTR(KERNEL_, COMPILE_WORK_GROUP_SIZE); ADD_ATTR(KERNEL_, LOCAL_MEM_SIZE); @@ -707,7 +703,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("event_info", py::no_init); + py::class_ cls(m, "event_info"); ADD_ATTR(EVENT_, COMMAND_QUEUE); ADD_ATTR(EVENT_, COMMAND_TYPE); ADD_ATTR(EVENT_, REFERENCE_COUNT); @@ -718,7 +714,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("command_type", py::no_init); + py::class_ cls(m, "command_type"); ADD_ATTR(COMMAND_, NDRANGE_KERNEL); ADD_ATTR(COMMAND_, TASK); ADD_ATTR(COMMAND_, NATIVE_KERNEL); @@ -754,7 +750,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("command_execution_status", py::no_init); + py::class_ cls(m, "command_execution_status"); ADD_ATTR(, COMPLETE); ADD_ATTR(, RUNNING); ADD_ATTR(, SUBMITTED); @@ -762,7 +758,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("profiling_info", py::no_init); + py::class_ cls(m, "profiling_info"); ADD_ATTR(PROFILING_COMMAND_, QUEUED); ADD_ATTR(PROFILING_COMMAND_, SUBMIT); ADD_ATTR(PROFILING_COMMAND_, START); @@ -772,7 +768,7 @@ void pyopencl_expose_constants() /* not needed--filled in automatically by implementation. #if PYOPENCL_CL_VERSION >= 0x1010 { - py::class_ cls("buffer_create_type", py::no_init); + py::class_ cls(m, "buffer_create_type"); ADD_ATTR(BUFFER_CREATE_TYPE_, REGION); } #endif @@ -780,7 +776,7 @@ void pyopencl_expose_constants() { py::class_ cls( - "mem_migration_flags", py::no_init); + m, "mem_migration_flags"); #if PYOPENCL_CL_VERSION >= 0x1020 ADD_ATTR(MIGRATE_MEM_OBJECT_, HOST); ADD_ATTR(MIGRATE_MEM_OBJECT_, CONTENT_UNDEFINED); @@ -789,7 +785,7 @@ void pyopencl_expose_constants() { py::class_ cls( - "device_partition_property_ext", py::no_init); + m, "device_partition_property_ext"); #if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) ADD_ATTR_SUFFIX(DEVICE_PARTITION_, EQUALLY, _EXT); ADD_ATTR_SUFFIX(DEVICE_PARTITION_, BY_COUNTS, _EXT); @@ -802,7 +798,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("affinity_domain_ext", py::no_init); + py::class_ cls(m, "affinity_domain_ext"); #if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, L1_CACHE, _EXT); ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, L2_CACHE, _EXT); @@ -815,7 +811,7 @@ void pyopencl_expose_constants() { py::class_ cls( - "device_partition_property", py::no_init); + m, "device_partition_property"); #if PYOPENCL_CL_VERSION >= 0x1020 ADD_ATTR(DEVICE_PARTITION_, EQUALLY); ADD_ATTR(DEVICE_PARTITION_, BY_COUNTS); @@ -825,7 +821,7 @@ void pyopencl_expose_constants() } { - py::class_ cls("device_affinity_domain", py::no_init); + py::class_ cls(m, "device_affinity_domain"); #if PYOPENCL_CL_VERSION >= 0x1020 ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, NUMA); ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, L4_CACHE); @@ -838,7 +834,7 @@ void pyopencl_expose_constants() #ifdef HAVE_GL { - py::class_ cls("gl_object_type", py::no_init); + py::class_ cls(m, "gl_object_type"); ADD_ATTR(GL_OBJECT_, BUFFER); ADD_ATTR(GL_OBJECT_, TEXTURE2D); ADD_ATTR(GL_OBJECT_, TEXTURE3D); @@ -846,14 +842,14 @@ void pyopencl_expose_constants() } { - py::class_ cls("gl_texture_info", py::no_init); + py::class_ cls(m, "gl_texture_info"); ADD_ATTR(GL_, TEXTURE_TARGET); ADD_ATTR(GL_, MIPMAP_LEVEL); } #endif { - py::class_ cls("migrate_mem_object_flags_ext", py::no_init); + py::class_ cls(m, "migrate_mem_object_flags_ext"); #ifdef cl_ext_migrate_memobject ADD_ATTR_SUFFIX(MIGRATE_MEM_OBJECT_, HOST, _EXT); #endif -- GitLab From 0780852202a1f556970792418d748fd7b9465122 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 9 Aug 2018 13:07:10 -0500 Subject: [PATCH 11/92] [pybind11] exception translation logic builds [ci skip] --- src/wrap_constants.cpp | 60 ++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/src/wrap_constants.cpp b/src/wrap_constants.cpp index 5b195687..495850d5 100644 --- a/src/wrap_constants.cpp +++ b/src/wrap_constants.cpp @@ -6,30 +6,6 @@ using namespace pyopencl; namespace { -#if 0 - py::handle - CLError, - CLMemoryError, - CLLogicError, - CLRuntimeError; - - - - - void translate_cl_error(const error &err) - { - if (err.code() == CL_MEM_OBJECT_ALLOCATION_FAILURE) - PyErr_SetObject(CLMemoryError.get(), py::object(err).ptr()); - else if (err.code() <= CL_INVALID_VALUE) - PyErr_SetObject(CLLogicError.get(), py::object(err).ptr()); - else if (err.code() > CL_INVALID_VALUE && err.code() < CL_SUCCESS) - PyErr_SetObject(CLRuntimeError.get(), py::object(err).ptr()); - else - PyErr_SetObject(CLError.get(), py::object(err).ptr()); - } -#endif - - // {{{ 'fake' constant scopes class status_code { }; class platform_info { }; @@ -86,22 +62,38 @@ namespace void pyopencl_expose_constants(py::module &m) { -#if 0 // {{{ exceptions + { #define DECLARE_EXC(NAME, BASE) \ - CL##NAME = py::handle<>(PyErr_NewException("pyopencl." #NAME, BASE, NULL)); \ - py::scope().attr(#NAME) = CL##NAME; + static py::exception CL##NAME(m, #NAME, BASE); - { DECLARE_EXC(Error, NULL); - DECLARE_EXC(MemoryError, CLError.get()); - DECLARE_EXC(LogicError, CLError.get()); - DECLARE_EXC(RuntimeError, CLError.get()); - - py::register_exception_translator(translate_cl_error); + DECLARE_EXC(MemoryError, CLError.ptr()); + DECLARE_EXC(LogicError, CLError.ptr()); + DECLARE_EXC(RuntimeError, CLError.ptr()); + + py::register_exception_translator( + [](std::exception_ptr p) + { + try + { + if (p) std::rethrow_exception(p); + } + catch (pyopencl::error &err) + { + py::object err_obj = py::cast(err); + if (err.code() == CL_MEM_OBJECT_ALLOCATION_FAILURE) + PyErr_SetObject(CLMemoryError.ptr(), err_obj.ptr()); + else if (err.code() <= CL_INVALID_VALUE) + PyErr_SetObject(CLLogicError.ptr(), err_obj.ptr()); + else if (err.code() > CL_INVALID_VALUE && err.code() < CL_SUCCESS) + PyErr_SetObject(CLRuntimeError.ptr(), err_obj.ptr()); + else + PyErr_SetObject(CLError.ptr(), err_obj.ptr()); + } + }); } // }}} -#endif // {{{ constants #define ADD_ATTR(PREFIX, NAME) \ -- GitLab From 401c6030e98812045c0907cae784984479a410b7 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 9 Aug 2018 19:17:09 -0500 Subject: [PATCH 12/92] [pybind11] More work towards a working wrapper [ci skip] --- pyopencl/__init__.py | 830 ++++++++++- pyopencl/cache.py | 2 +- pyopencl/cffi_cl.py | 2954 ---------------------------------------- pyopencl/mempool.py | 275 ---- pyopencl/tools.py | 10 +- setup.py | 4 +- src/wrap_cl.hpp | 1 - src/wrap_cl_part_1.cpp | 78 +- src/wrap_cl_part_2.cpp | 68 +- 9 files changed, 893 insertions(+), 3329 deletions(-) delete mode 100644 pyopencl/cffi_cl.py delete mode 100644 pyopencl/mempool.py diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 53934975..f8ba8ccb 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -26,7 +26,7 @@ THE SOFTWARE. import re import six -from six.moves import input +from six.moves import input, intern from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa @@ -34,7 +34,7 @@ import logging logger = logging.getLogger(__name__) try: - import pyopencl.cffi_cl as _cl + import pyopencl._cl as _cl except ImportError: import os from os.path import dirname, join, realpath @@ -46,7 +46,14 @@ except ImportError: import numpy as np -from pyopencl.cffi_cl import ( # noqa +from pytools import Record + +import sys + +_PYPY = '__pypy__' in sys.builtin_module_names +_CPY2 = not _PYPY and sys.version_info < (3,) + +from pyopencl._cl import ( # noqa get_cl_header_version, program_kind, status_code, @@ -57,17 +64,20 @@ from pyopencl.cffi_cl import ( # noqa device_mem_cache_type, device_local_mem_type, device_exec_capabilities, - device_svm_capabilities, + # FIXME + # device_svm_capabilities, command_queue_properties, context_info, gl_context_info, context_properties, command_queue_info, - queue_properties, + # FIXME + # queue_properties, mem_flags, - svm_mem_flags, + # FIXME + # svm_mem_flags, channel_order, channel_type, @@ -84,9 +94,10 @@ from pyopencl.cffi_cl import ( # noqa kernel_info, kernel_arg_info, - kernel_arg_address_qualifier, - kernel_arg_access_qualifier, - kernel_arg_type_qualifier, + # FIXME + # kernel_arg_address_qualifier, + # kernel_arg_access_qualifier, + # kernel_arg_type_qualifier, kernel_work_group_info, event_info, @@ -94,7 +105,8 @@ from pyopencl.cffi_cl import ( # noqa command_execution_status, profiling_info, mem_migration_flags, - mem_migration_flags_ext, + # FIXME + # mem_migration_flags_ext, device_partition_property, device_affinity_domain, gl_object_type, @@ -115,11 +127,13 @@ from pyopencl.cffi_cl import ( # noqa MemoryObject, MemoryMap, Buffer, - SVMAllocation, - SVM, - SVMMap, + # FIXME + # SVMAllocation, + # SVM, + # SVMMap, - CompilerWarning, + # FIXME + # CompilerWarning, _Program, Kernel, @@ -155,11 +169,13 @@ from pyopencl.cffi_cl import ( # noqa enqueue_fill_image, _enqueue_copy_image_to_buffer, _enqueue_copy_buffer_to_image, - enqueue_svm_memfill, - enqueue_svm_migratemem, + # FIXME + # enqueue_svm_memfill, + # enqueue_svm_migratemem, have_gl, - _GLObject, + # FIXME? + # _GLObject, GLBuffer, GLRenderBuffer, @@ -170,14 +186,24 @@ from pyopencl.cffi_cl import ( # noqa Image, Sampler, GLTexture, - DeviceTopologyAmd, + # FIXME + # DeviceTopologyAmd, - add_get_info_attrs as _add_get_info_attrs, + # FIXME? + # add_get_info_attrs as _add_get_info_attrs, ) +import inspect as _inspect + +CONSTANT_CLASSES = [ + getattr(_cl, name) for name in dir(_cl) + if _inspect.isclass(getattr(_cl, name)) + and name[0].islower() and name not in ["zip", "map", "range"]] + + if _cl.have_gl(): try: - from pyopencl.cffi_cl import get_apple_cgl_share_group # noqa + from pyopencl._cl import get_apple_cgl_share_group # noqa except ImportError: pass @@ -190,6 +216,10 @@ if _cl.have_gl(): pass +class _ErrorRecord(Record): + pass + + # {{{ find pyopencl shipped source code def _find_pyopencl_include_path(): @@ -522,7 +552,7 @@ class Program(object): return hash(self._get_prg()) -_add_get_info_attrs(Program, Program.get_info, program_info) +# _add_get_info_attrs(Program, Program.get_info, program_info) def create_program_with_built_in_kernels(context, devices, kernel_names): @@ -540,9 +570,761 @@ def link_program(context, programs, options=[], devices=None): # }}} +# {{{ monkeypatch C++ wrappers to add functionality + +def _add_functionality(): + cls_to_info_cls = { + _cl.Platform: (_cl.Platform.get_info, _cl.platform_info, []), + _cl.Device: (_cl.Device.get_info, _cl.device_info, + ["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"]), + _cl.Context: (_cl.Context.get_info, _cl.context_info, []), + _cl.CommandQueue: (_cl.CommandQueue.get_info, _cl.command_queue_info, + ["CONTEXT", "DEVICE"]), + _cl.Event: (_cl.Event.get_info, _cl.event_info, []), + _cl.MemoryObjectHolder: + (MemoryObjectHolder.get_info, _cl.mem_info, []), + Image: (_cl.Image.get_image_info, _cl.image_info, []), + Program: (Program.get_info, _cl.program_info, []), + Kernel: (Kernel.get_info, _cl.kernel_info, []), + _cl.Sampler: (Sampler.get_info, _cl.sampler_info, []), + } + + def to_string(cls, value, default_format=None): + for name in dir(cls): + if (not name.startswith("_") and getattr(cls, name) == value): + return name + + if default_format is None: + raise ValueError("a name for value %d was not found in %s" + % (value, cls.__name__)) + else: + return default_format % value + + for cls in CONSTANT_CLASSES: + cls.to_string = classmethod(to_string) + + # {{{ get_info attributes ------------------------------------------------- + + def make_getinfo(info_method, info_name, info_attr): + def result(self): + return info_method(self, info_attr) + + return property(result) + + def make_cacheable_getinfo(info_method, info_name, cache_attr, info_attr): + def result(self): + try: + return getattr(self, cache_attr) + except AttributeError: + pass + + result = info_method(self, info_attr) + setattr(self, cache_attr, result) + return result + + return property(result) + + for cls, (info_method, info_class, cacheable_attrs) \ + in six.iteritems(cls_to_info_cls): + for info_name, info_value in six.iteritems(info_class.__dict__): + if info_name == "to_string" or info_name.startswith("_"): + continue + + info_lower = info_name.lower() + info_constant = getattr(info_class, info_name) + if info_name in cacheable_attrs: + cache_attr = intern("_info_cache_"+info_lower) + setattr(cls, info_lower, make_cacheable_getinfo( + info_method, info_lower, cache_attr, info_constant)) + else: + setattr(cls, info_lower, make_getinfo( + info_method, info_name, info_constant)) + + # }}} + + # {{{ Platform + + def platform_repr(self): + return "" % (self.name, self.int_ptr) + + def platform_get_cl_version(self): + import re + version_string = self.version + match = re.match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", version_string) + if match is None: + raise RuntimeError("platform %s returned non-conformant " + "platform version string '%s'" % + (self, version_string)) + + return int(match.group(1)), int(match.group(2)) + + Platform.__repr__ = platform_repr + Platform._get_cl_version = platform_get_cl_version + + # }}} + + # {{{ Device + + def device_repr(self): + return "" % ( + self.name.strip(), self.platform.name.strip(), self.int_ptr) + + def device_persistent_unique_id(self): + return (self.vendor, self.vendor_id, self.name, self.version) + + Device.__repr__ = device_repr + + # undocumented for now: + Device.persistent_unique_id = property(device_persistent_unique_id) + + # }}} + + # {{{ Context + + def context_repr(self): + return "" % (self.int_ptr, + ", ".join(repr(dev) for dev in self.devices)) + + def context_get_cl_version(self): + return self.devices[0].platform._get_cl_version() + + Context.__repr__ = context_repr + from pytools import memoize_method + Context._get_cl_version = memoize_method(context_get_cl_version) + + # }}} + + # {{{ CommandQueue + + def command_queue_enter(self): + return self + + def command_queue_exit(self, exc_type, exc_val, exc_tb): + self.finish() + + def command_queue_get_cl_version(self): + return self.context._get_cl_version() + + CommandQueue.__enter__ = command_queue_enter + CommandQueue.__exit__ = command_queue_exit + CommandQueue._get_cl_version = memoize_method(command_queue_get_cl_version) + + # }}} + + # {{{ _Program (the internal, non-caching version) + + def program_get_build_logs(self): + build_logs = [] + for dev in self.get_info(_cl.program_info.DEVICES): + try: + log = self.get_build_info(dev, program_build_info.LOG) + except Exception: + log = "" + + build_logs.append((dev, log)) + + return build_logs + + def program_build(self, options_bytes, devices=None): + err = None + try: + self._build(options=options_bytes, devices=devices) + except Error as e: + what = e.what + "\n\n" + (75*"="+"\n").join( + "Build on %s:\n\n%s" % (dev, log) + for dev, log in self._get_build_logs()) + code = e.code + routine = e.routine + + err = _cl.RuntimeError( + _ErrorRecord( + what=lambda: what, + code=lambda: code, + routine=lambda: routine)) + + if err is not None: + # Python 3.2 outputs the whole list of currently active exceptions + # This serves to remove one (redundant) level from that nesting. + raise err + + message = (75*"="+"\n").join( + "Build on %s succeeded, but said:\n\n%s" % (dev, log) + for dev, log in self._get_build_logs() + if log is not None and log.strip()) + + if message: + if self.kind() == program_kind.SOURCE: + build_type = "From-source build" + elif self.kind() == program_kind.BINARY: + build_type = "From-binary build" + else: + build_type = "Build" + + compiler_output("%s succeeded, but resulted in non-empty logs:\n%s" + % (build_type, message)) + + return self + + _cl._Program._get_build_logs = program_get_build_logs + _cl._Program.build = program_build + + # }}} + + # {{{ Event + class ProfilingInfoGetter: + def __init__(self, event): + self.event = event + + def __getattr__(self, name): + info_cls = _cl.profiling_info + + try: + inf_attr = getattr(info_cls, name.upper()) + except AttributeError: + raise AttributeError("%s has no attribute '%s'" + % (type(self), name)) + else: + return self.event.get_profiling_info(inf_attr) + + _cl.Event.profile = property(ProfilingInfoGetter) + + # }}} + + # {{{ Kernel + + kernel_old_init = Kernel.__init__ + kernel_old_get_work_group_info = Kernel.get_work_group_info + + def kernel_init(self, prg, name): + if not isinstance(prg, _cl._Program): + prg = prg._get_prg() + + kernel_old_init(self, prg, name) + + self._setup(prg) + + def kernel__setup(self, prg): + self._source = getattr(prg, "_source", None) + + self._generate_naive_call() + self._wg_info_cache = {} + return self + + def kernel_get_work_group_info(self, param, device): + try: + return self._wg_info_cache[param, device] + except KeyError: + pass + + result = kernel_old_get_work_group_info(self, param, device) + self._wg_info_cache[param, device] = result + return result + + # {{{ code generation for __call__, set_args + + def kernel__set_set_args_body(self, body, num_passed_args): + from pytools.py_codegen import ( + PythonFunctionGenerator, + PythonCodeGenerator, + Indentation) + + arg_names = ["arg%d" % i for i in range(num_passed_args)] + + # {{{ wrap in error handler + + err_gen = PythonCodeGenerator() + + def gen_error_handler(): + err_gen(""" + if current_arg is not None: + args = [{args}] + advice = "" + from pyopencl.array import Array + if isinstance(args[current_arg], Array): + advice = " (perhaps you meant to pass 'array.data' " \ + "instead of the array itself?)" + + raise _cl.LogicError( + "when processing argument #%d (1-based): %s%s" + % (current_arg+1, str(e), advice)) + else: + raise + """ + .format(args=", ".join(arg_names))) + err_gen("") + + err_gen("try:") + with Indentation(err_gen): + err_gen.extend(body) + err_gen("except TypeError as e:") + with Indentation(err_gen): + gen_error_handler() + err_gen("except _cl.LogicError as e:") + with Indentation(err_gen): + gen_error_handler() + + # }}} + + def add_preamble(gen): + gen.add_to_preamble( + "import numpy as np") + gen.add_to_preamble( + "import pyopencl._cl as _cl") + gen.add_to_preamble("from pyopencl import status_code") + gen.add_to_preamble("from struct import pack") + gen.add_to_preamble("") + + # {{{ generate _enqueue + + gen = PythonFunctionGenerator("enqueue_knl_%s" % self.function_name, + ["self", "queue", "global_size", "local_size"] + + arg_names + + ["global_offset=None", "g_times_l=None", "wait_for=None"]) + + add_preamble(gen) + gen.extend(err_gen) + + gen(""" + return _cl.enqueue_nd_range_kernel(queue, self, global_size, local_size, + global_offset, wait_for, g_times_l=g_times_l) + """) + + self._enqueue = gen.get_function() + + # }}} + + # {{{ generate set_args + + gen = PythonFunctionGenerator("_set_args", ["self"] + arg_names) + + add_preamble(gen) + gen.extend(err_gen) + + self._set_args = gen.get_function() + + # }}} + + def kernel__generate_buffer_arg_setter(self, gen, arg_idx, buf_var): + # (TODO: still needed?) + + # from pytools.py_codegen import Indentation + # + # if _CPY2: + # # https://github.com/numpy/numpy/issues/5381 + # gen("if isinstance({buf_var}, np.generic):".format(buf_var=buf_var)) + # with Indentation(gen): + # gen("{buf_var} = np.getbuffer({buf_var})".format(buf_var=buf_var)) + + gen(""" + kernel._set_arg_bytes({arg_idx}, {buf_var}) + """ + .format(arg_idx=arg_idx, buf_var=buf_var)) + + def kernel__generate_bytes_arg_setter(self, gen, arg_idx, buf_var): + gen(""" + self._set_arg_bytes({arg_idx}, {buf_var}) + """ + .format(arg_idx=arg_idx, buf_var=buf_var)) + + def kernel__generate_generic_arg_handler(self, gen, arg_idx, arg_var): + from pytools.py_codegen import Indentation + + gen(""" + if {arg_var} is None: + self._set_arg_null({arg_idx}) + elif isinstance({arg_var}, _CLKernelArg): + self.set_arg({arg_idx}, {arg_var}) + """ + .format(arg_idx=arg_idx, arg_var=arg_var)) + + gen("else:") + with Indentation(gen): + self._generate_buffer_arg_setter(gen, arg_idx, arg_var) + + def kernel__generate_naive_call(self): + num_args = self.num_args + + from pytools.py_codegen import PythonCodeGenerator + gen = PythonCodeGenerator() + + if num_args == 0: + gen("pass") + + for i in range(num_args): + gen("# process argument {arg_idx}".format(arg_idx=i)) + gen("") + gen("current_arg = {arg_idx}".format(arg_idx=i)) + self._generate_generic_arg_handler(gen, i, "arg%d" % i) + gen("") + + self._set_set_args_body(gen, num_args) + + def kernel_set_scalar_arg_dtypes(self, scalar_arg_dtypes): + self._scalar_arg_dtypes = scalar_arg_dtypes + + # {{{ arg counting bug handling + + # For example: + # https://github.com/pocl/pocl/issues/197 + # (but Apple CPU has a similar bug) + + work_around_arg_count_bug = False + warn_about_arg_count_bug = False + + from pyopencl.characterize import has_struct_arg_count_bug + + count_bug_per_dev = [ + has_struct_arg_count_bug(dev) + for dev in self.context.devices] + + from pytools import single_valued + if any(count_bug_per_dev): + if all(count_bug_per_dev): + work_around_arg_count_bug = single_valued(count_bug_per_dev) + else: + warn_about_arg_count_bug = True + + fp_arg_count = 0 + + # }}} + + cl_arg_idx = 0 + + from pytools.py_codegen import PythonCodeGenerator + gen = PythonCodeGenerator() + + if not scalar_arg_dtypes: + gen("pass") + + for arg_idx, arg_dtype in enumerate(scalar_arg_dtypes): + gen("# process argument {arg_idx}".format(arg_idx=arg_idx)) + gen("") + gen("current_arg = {arg_idx}".format(arg_idx=arg_idx)) + arg_var = "arg%d" % arg_idx + + if arg_dtype is None: + self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var) + cl_arg_idx += 1 + gen("") + continue + + arg_dtype = np.dtype(arg_dtype) + + if arg_dtype.char == "V": + self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var) + cl_arg_idx += 1 + + elif arg_dtype.kind == "c": + if warn_about_arg_count_bug: + warn("{knl_name}: arguments include complex numbers, and " + "some (but not all) of the target devices mishandle " + "struct kernel arguments (hence the workaround is " + "disabled".format( + knl_name=self.function_name, stacklevel=2)) + + if arg_dtype == np.complex64: + arg_char = "f" + elif arg_dtype == np.complex128: + arg_char = "d" + else: + raise TypeError("unexpected complex type: %s" % arg_dtype) + + if (work_around_arg_count_bug == "pocl" + and arg_dtype == np.complex128 + and fp_arg_count + 2 <= 8): + gen( + "buf = pack('{arg_char}', {arg_var}.real)" + .format(arg_char=arg_char, arg_var=arg_var)) + self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + gen("current_arg = current_arg + 1000") + gen( + "buf = pack('{arg_char}', {arg_var}.imag)" + .format(arg_char=arg_char, arg_var=arg_var)) + self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + + elif (work_around_arg_count_bug == "apple" + and arg_dtype == np.complex128 + and fp_arg_count + 2 <= 8): + raise NotImplementedError("No work-around to " + "Apple's broken structs-as-kernel arg " + "handling has been found. " + "Cannot pass complex numbers to kernels.") + + else: + gen( + "buf = pack('{arg_char}{arg_char}', " + "{arg_var}.real, {arg_var}.imag)" + .format(arg_char=arg_char, arg_var=arg_var)) + self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + + fp_arg_count += 2 + + elif arg_dtype.char in "IL" and _CPY26: + # Prevent SystemError: ../Objects/longobject.c:336: bad + # argument to internal function + + gen( + "buf = pack('{arg_char}', long({arg_var}))" + .format(arg_char=arg_dtype.char, arg_var=arg_var)) + self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + + else: + if arg_dtype.kind == "f": + fp_arg_count += 1 + + arg_char = arg_dtype.char + arg_char = _type_char_map.get(arg_char, arg_char) + gen( + "buf = pack('{arg_char}', {arg_var})" + .format( + arg_char=arg_char, + arg_var=arg_var)) + self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + + gen("") + + if cl_arg_idx != self.num_args: + raise TypeError( + "length of argument list (%d) and " + "CL-generated number of arguments (%d) do not agree" + % (cl_arg_idx, self.num_args)) + + self._set_set_args_body(gen, len(scalar_arg_dtypes)) + + # }}} + + def kernel_set_args(self, *args, **kwargs): + # Need to dupicate the 'self' argument for dynamically generated method + return self._set_args(self, *args, **kwargs) + + def kernel_call(self, queue, global_size, local_size, *args, **kwargs): + # __call__ can't be overridden directly, so we need this + # trampoline hack. + return self._enqueue(self, queue, global_size, local_size, *args, **kwargs) + + def kernel_capture_call(self, filename, queue, global_size, local_size, + *args, **kwargs): + from pyopencl.capture_call import capture_kernel_call + capture_kernel_call(self, filename, queue, global_size, local_size, + *args, **kwargs) + + Kernel.__init__ = kernel_init + Kernel._setup = kernel__setup + Kernel.get_work_group_info = kernel_get_work_group_info + Kernel._set_set_args_body = kernel__set_set_args_body + Kernel._generate_bufprot_arg_setter = kernel__generate_bufprot_arg_setter + Kernel._generate_bytes_arg_setter = kernel__generate_bytes_arg_setter + Kernel._generate_generic_arg_handler = kernel__generate_generic_arg_handler + Kernel._generate_naive_call = kernel__generate_naive_call + Kernel.set_scalar_arg_dtypes = kernel_set_scalar_arg_dtypes + Kernel.set_args = kernel_set_args + Kernel.__call__ = kernel_call + Kernel.capture_call = kernel_capture_call + + # }}} + + # {{{ ImageFormat + + def image_format_repr(self): + return "ImageFormat(%s, %s)" % ( + channel_order.to_string(self.channel_order, + ""), + channel_type.to_string(self.channel_data_type, + "")) + + def image_format_eq(self, other): + return (self.channel_order == other.channel_order + and self.channel_data_type == other.channel_data_type) + + def image_format_ne(self, other): + return not image_format_eq(self, other) + + def image_format_hash(self): + return hash((type(self), self.channel_order, self.channel_data_type)) + + ImageFormat.__repr__ = image_format_repr + ImageFormat.__eq__ = image_format_eq + ImageFormat.__ne__ = image_format_ne + ImageFormat.__hash__ = image_format_hash + + # }}} + + # {{{ Image + + image_old_init = Image.__init__ + + def image_init(self, context, flags, format, shape=None, pitches=None, + hostbuf=None, is_array=False, buffer=None): + + if shape is None and hostbuf is None: + raise Error("'shape' must be passed if 'hostbuf' is not given") + + if shape is None and hostbuf is not None: + shape = hostbuf.shape + + if hostbuf is not None and not \ + (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)): + from warnings import warn + warn("'hostbuf' was passed, but no memory flags to make use of it.") + + if hostbuf is None and pitches is not None: + raise Error("'pitches' may only be given if 'hostbuf' is given") + + if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2): + if buffer is not None and is_array: + raise ValueError( + "'buffer' and 'is_array' are mutually exclusive") + + if len(shape) == 3: + if buffer is not None: + raise TypeError( + "'buffer' argument is not supported for 3D arrays") + elif is_array: + image_type = mem_object_type.IMAGE2D_ARRAY + else: + image_type = mem_object_type.IMAGE3D + + elif len(shape) == 2: + if buffer is not None: + raise TypeError( + "'buffer' argument is not supported for 2D arrays") + elif is_array: + image_type = mem_object_type.IMAGE1D_ARRAY + else: + image_type = mem_object_type.IMAGE2D + + elif len(shape) == 1: + if buffer is not None: + image_type = mem_object_type.IMAGE1D_BUFFER + elif is_array: + raise TypeError("array of zero-dimensional images not supported") + else: + image_type = mem_object_type.IMAGE1D + + else: + raise ValueError("images cannot have more than three dimensions") + + desc = ImageDescriptor() + + desc.image_type = image_type + desc.shape = shape # also sets desc.array_size + + if pitches is None: + desc.pitches = (0, 0) + else: + desc.pitches = pitches + + desc.num_mip_levels = 0 # per CL 1.2 spec + desc.num_samples = 0 # per CL 1.2 spec + desc.buffer = buffer + + image_old_init(self, context, flags, format, desc, hostbuf) + else: + # legacy init for CL 1.1 and older + if is_array: + raise TypeError("'is_array=True' is not supported for CL < 1.2") + # if num_mip_levels is not None: + # raise TypeError( + # "'num_mip_levels' argument is not supported for CL < 1.2") + # if num_samples is not None: + # raise TypeError( + # "'num_samples' argument is not supported for CL < 1.2") + if buffer is not None: + raise TypeError("'buffer' argument is not supported for CL < 1.2") + + image_old_init(self, context, flags, format, shape, + pitches, hostbuf) + + class _ImageInfoGetter: + def __init__(self, event): + from warnings import warn + warn("Image.image.attr is deprecated. " + "Use Image.attr directly, instead.") + + self.event = event + + def __getattr__(self, name): + try: + inf_attr = getattr(_cl.image_info, name.upper()) + except AttributeError: + raise AttributeError("%s has no attribute '%s'" + % (type(self), name)) + else: + return self.event.get_image_info(inf_attr) + + def image_shape(self): + if self.type == mem_object_type.IMAGE2D: + return (self.width, self.height) + elif self.type == mem_object_type.IMAGE3D: + return (self.width, self.height, self.depth) + else: + raise LogicError("only images have shapes") + + Image.__init__ = image_init + Image.image = property(_ImageInfoGetter) + Image.shape = property(image_shape) + + # }}} + + # {{{ Error + + def error_str(self): + val = self.args[0] + try: + val.routine + except AttributeError: + return str(val) + else: + result = "" + if val.code() != status_code.SUCCESS: + result = status_code.to_string( + val.code(), "") + routine = val.routine() + if routine: + result = "%s failed: %s" % (routine, result) + what = val.what() + if what: + if result: + result += " - " + result += what + return result + + def error_code(self): + return self.args[0].code() + + def error_routine(self): + return self.args[0].routine() + + def error_what(self): + return self.args[0].what() + + Error.__str__ = error_str + Error.code = property(error_code) + Error.routine = property(error_routine) + Error.what = property(error_what) + + # }}} + + if _cl.have_gl(): + def gl_object_get_gl_object(self): + return self.get_gl_object_info()[1] + + GLBuffer.gl_object = property(gl_object_get_gl_object) + GLTexture.gl_object = property(gl_object_get_gl_object) + +_add_functionality() + +# }}} + + # {{{ create_some_context -def create_some_context(interactive=None, answers=None, cache_dir=None): +def create_some_context(interactive=None, answers=None): import os if answers is None: if "PYOPENCL_CTX" in os.environ: @@ -553,7 +1335,7 @@ def create_some_context(interactive=None, answers=None, cache_dir=None): from pyopencl.tools import get_test_platforms_and_devices for plat, devs in get_test_platforms_and_devices(): for dev in devs: - return Context([dev], cache_dir=cache_dir) + return Context([dev]) if answers is not None: pre_provided_answers = answers @@ -668,7 +1450,7 @@ def create_some_context(interactive=None, answers=None, cache_dir=None): raise RuntimeError("not all provided choices were used by " "create_some_context. (left over: '%s')" % ":".join(answers)) - return Context(devices, cache_dir=cache_dir) + return Context(devices) _csc = create_some_context diff --git a/pyopencl/cache.py b/pyopencl/cache.py index 22e55c40..670134e8 100644 --- a/pyopencl/cache.py +++ b/pyopencl/cache.py @@ -26,7 +26,7 @@ THE SOFTWARE. import six from six.moves import zip -import pyopencl.cffi_cl as _cl +import pyopencl._cl as _cl import re import sys import os diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py deleted file mode 100644 index c5effc7b..00000000 --- a/pyopencl/cffi_cl.py +++ /dev/null @@ -1,2954 +0,0 @@ -from __future__ import division, absolute_import - -__copyright__ = """ -Copyright (C) 2013 Marko Bencun -Copyright (C) 2014 Andreas Kloeckner -Copyright (C) 2014 Yichao Yu -""" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -import six -from six.moves import map, range, zip, intern - -import warnings -import numpy as np -import sys -import re - -from pytools import memoize_method - -from pyopencl._cffi import ffi as _ffi -from .compyte.array import f_contiguous_strides, c_contiguous_strides - - -from pyopencl._cffi import lib as _lib - -import logging -logger = logging.getLogger(__name__) - - -class _CLKernelArg(object): - pass - - -# {{{ hook up connections between the wrapper and the interperter - -import gc -_py_gc = _ffi.callback('int(void)')(gc.collect) - -_pyrefs = {} - - -@_ffi.callback('void(void*)') -def _py_deref(handle): - try: - del _pyrefs[handle] - except Exception: - pass - - -# return a new reference of the object pointed to by the handle. -# The return value might be different with the input (on PyPy). -# _py_deref should be called (once) when the object is not needed anymore. -@_ffi.callback('void*(void*)') -def _py_ref(handle): - obj = _ffi.from_handle(handle) - handle = _ffi.new_handle(obj) - _pyrefs[handle] = handle - return handle - - -@_ffi.callback('void(void*, cl_int)') -def _py_call(handle, status): - _ffi.from_handle(handle)(status) - - -_lib.set_py_funcs(_py_gc, _py_ref, _py_deref, _py_call) - -# }}} - - -# {{{ compatibility shims - -# are we running on pypy? -_PYPY = '__pypy__' in sys.builtin_module_names -_CPY2 = not _PYPY and sys.version_info < (3,) - -try: - _unicode = eval('unicode') - _ffi_pystr = _ffi.string -except Exception: - _unicode = str - _bytes = bytes - - def _ffi_pystr(s): - return _ffi.string(s).decode() if s else None -else: - try: - _bytes = bytes - except Exception: - _bytes = str - - -def _to_cstring(s): - if isinstance(s, _unicode): - return s.encode() - return s - -# }}} - - -# {{{ wrapper tools - -# {{{ _CArray helper classes - -class _CArray(object): - def __init__(self, ptr): - self.ptr = ptr - self.size = _ffi.new('uint32_t*') - - def __del__(self): - if self.ptr != _ffi.NULL: - _lib.free_pointer(self.ptr[0]) - - def __getitem__(self, key): - return self.ptr[0].__getitem__(key) - - def __iter__(self): - for i in range(self.size[0]): - yield self[i] - -# }}} - - -# {{{ GetInfo support - -def _generic_info_to_python(info): - type_ = _ffi_pystr(info.type) - - if info.free_type: - _lib.free_pointer(info.type) - - value = _ffi.cast(type_, info.value) - - if info.opaque_class != _lib.CLASS_NONE: - klass = { - _lib.CLASS_PLATFORM: Platform, - _lib.CLASS_DEVICE: Device, - _lib.CLASS_KERNEL: Kernel, - _lib.CLASS_CONTEXT: Context, - _lib.CLASS_BUFFER: Buffer, - _lib.CLASS_PROGRAM: _Program, - _lib.CLASS_EVENT: Event, - _lib.CLASS_COMMAND_QUEUE: CommandQueue - }[info.opaque_class] - - if klass is _Program: - def create_inst(val): - from pyopencl import Program - return Program(_Program._create(val)) - - else: - create_inst = klass._create - - if type_.endswith(']'): - ret = list(map(create_inst, value)) - _lib.free_pointer(info.value) - return ret - else: - return create_inst(value) - - if type_ == 'char*': - ret = _ffi_pystr(value) - elif type_ == 'cl_device_topology_amd*': - ret = DeviceTopologyAmd( - value.pcie.bus, value.pcie.device, value.pcie.function) - elif type_ == 'cl_image_format*': - ret = ImageFormat(value.image_channel_order, - value.image_channel_data_type) - elif type_.startswith('char*['): - ret = list(map(_ffi_pystr, value)) - _lib.free_pointer_array(info.value, len(value)) - elif type_.endswith(']'): - if type_.startswith('char['): - # This is usually a CL binary, which may contain NUL characters - # that should be preserved. - ret = _bytes(_ffi.buffer(value)) - - elif type_.startswith('generic_info['): - ret = list(map(_generic_info_to_python, value)) - elif type_.startswith('cl_image_format['): - ret = [ImageFormat(imf.image_channel_order, - imf.image_channel_data_type) - for imf in value] - else: - ret = list(value) - else: - ret = value[0] - if info.free_value: - _lib.free_pointer(info.value) - return ret - -# }}} - - -def _clobj_list(objs): - if objs is None: - return _ffi.NULL, 0 - return [ev.ptr for ev in objs], len(objs) - - -# {{{ common base class - -class _Common(object): - @classmethod - def _create(cls, ptr): - self = cls.__new__(cls) - self.ptr = ptr - return self - ptr = _ffi.NULL - - # {{{ cleanup - - # The module-global _lib variable may get set to None during interpreter - # cleanup before we're done cleaning up CL objects. (Symbols starting with - # an underscore even get cleared first [1]--although it's unclear that that - # really matters.) To retain our ability to clean up objects, retain a - # reference to the _lib module. - # - # [1] https://www.python.org/doc/essays/cleanup/ - - _retained_lib = _lib - - def __del__(self): - self._retained_lib.clobj__delete(self.ptr) - - # }}} - - def __eq__(self, other): - return other.int_ptr == self.int_ptr - - def __ne__(self, other): - return not self.__eq__(other) - - def __hash__(self): - return _lib.clobj__int_ptr(self.ptr) - - def get_info(self, param): - info = _ffi.new('generic_info*') - _handle_error(_lib.clobj__get_info(self.ptr, param, info)) - return _generic_info_to_python(info) - - @property - def int_ptr(self): - return _lib.clobj__int_ptr(self.ptr) - - @classmethod - def from_int_ptr(cls, int_ptr_value, retain=True): - """Constructs a :mod:`pyopencl` handle from a C-level pointer (given as - the integer *int_ptr_value*). If *retain* is *True* (the default) - :mod:`pyopencl` will call ``clRetainXXX`` on the provided object. If - the previous owner of the object will *not* release the reference, - *retain* should be set to *False*, to effectively transfer ownership to - :mod:`pyopencl`. - - .. versionchanged:: 2016.1 - - *retain* added - """ - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.clobj__from_int_ptr( - ptr, int_ptr_value, getattr(_lib, 'CLASS_%s' % cls._id.upper()), - retain)) - return cls._create(ptr[0]) - -# }}} - -# }}} - - -def get_cl_header_version(): - v = _lib.get_cl_version() - return (v >> (3 * 4), - (v >> (1 * 4)) & 0xff) - - -# {{{ constants - -_constants = {} - - -# {{{ constant classes - -class _ConstantsNamespace(object): - def __init__(self): - raise RuntimeError("This class cannot be instantiated.") - - @classmethod - def to_string(cls, value, default_format=None): - for name in dir(cls): - if (not name.startswith("_") and getattr(cls, name) == value): - return name - - if default_format is None: - raise ValueError("a name for value %d was not found in %s" - % (value, cls.__name__)) - else: - return default_format % value - - -# /!\ If you add anything here, add it to pyopencl/__init__.py as well. - -class program_kind(_ConstantsNamespace): # noqa - pass - - -class status_code(_ConstantsNamespace): # noqa - pass - - -class platform_info(_ConstantsNamespace): # noqa - pass - - -class device_type(_ConstantsNamespace): # noqa - @classmethod - def to_string(cls, value, default_format=None): - for name in dir(cls): - if name in ("DEFAULT", "ALL"): - continue - if not name.startswith("_"): - bitfield = getattr(cls, name) - if (isinstance(bitfield, six.integer_types) - and ((bitfield & value) == bitfield)): - return name - - if default_format is None: - raise ValueError("a name for value %d was not found in %s" - % (value, cls.__name__)) - else: - return default_format % value - - -class device_info(_ConstantsNamespace): # noqa - pass - - -class device_fp_config(_ConstantsNamespace): # noqa - pass - - -class device_mem_cache_type(_ConstantsNamespace): # noqa - pass - - -class device_local_mem_type(_ConstantsNamespace): # noqa - pass - - -class device_exec_capabilities(_ConstantsNamespace): # noqa - pass - - -class device_svm_capabilities(_ConstantsNamespace): # noqa - pass - - -class command_queue_properties(_ConstantsNamespace): # noqa - pass - - -class context_info(_ConstantsNamespace): # noqa - pass - - -class gl_context_info(_ConstantsNamespace): # noqa - pass - - -class context_properties(_ConstantsNamespace): # noqa - pass - - -class command_queue_info(_ConstantsNamespace): # noqa - pass - - -class queue_properties(_ConstantsNamespace): # noqa - pass - - -class mem_flags(_ConstantsNamespace): # noqa - @classmethod - def _writable(cls, flags): - return flags & (cls.READ_WRITE | cls.WRITE_ONLY) - - @classmethod - def _hold_host(cls, flags): - return flags & cls.USE_HOST_PTR - - @classmethod - def _use_host(cls, flags): - return flags & (cls.USE_HOST_PTR | cls.COPY_HOST_PTR) - - @classmethod - def _host_writable(cls, flags): - return cls._writable(flags) and cls._hold_host(flags) - - -class svm_mem_flags(_ConstantsNamespace): # noqa - pass - - -class channel_order(_ConstantsNamespace): # noqa - pass - - -class channel_type(_ConstantsNamespace): # noqa - pass - - -class mem_object_type(_ConstantsNamespace): # noqa - pass - - -class mem_info(_ConstantsNamespace): # noqa - pass - - -class image_info(_ConstantsNamespace): # noqa - pass - - -class addressing_mode(_ConstantsNamespace): # noqa - pass - - -class filter_mode(_ConstantsNamespace): # noqa - pass - - -class sampler_info(_ConstantsNamespace): # noqa - pass - - -class map_flags(_ConstantsNamespace): # noqa - pass - - -class program_info(_ConstantsNamespace): # noqa - pass - - -class program_build_info(_ConstantsNamespace): # noqa - pass - - -class program_binary_type(_ConstantsNamespace): # noqa - pass - - -class kernel_info(_ConstantsNamespace): # noqa - pass - - -class kernel_arg_info(_ConstantsNamespace): # noqa - pass - - -class kernel_arg_address_qualifier(_ConstantsNamespace): # noqa - pass - - -class kernel_arg_access_qualifier(_ConstantsNamespace): # noqa - pass - - -class kernel_arg_type_qualifier(_ConstantsNamespace): # noqa - pass - - -class kernel_work_group_info(_ConstantsNamespace): # noqa - pass - - -class event_info(_ConstantsNamespace): # noqa - pass - - -class command_type(_ConstantsNamespace): # noqa - pass - - -class command_execution_status(_ConstantsNamespace): # noqa - pass - - -class profiling_info(_ConstantsNamespace): # noqa - pass - - -class mem_migration_flags(_ConstantsNamespace): # noqa - pass - - -class mem_migration_flags_ext(_ConstantsNamespace): # noqa - pass - - -class device_partition_property(_ConstantsNamespace): # noqa - pass - - -class device_affinity_domain(_ConstantsNamespace): # noqa - pass - - -class gl_object_type(_ConstantsNamespace): # noqa - pass - - -class gl_texture_info(_ConstantsNamespace): # noqa - pass - - -class migrate_mem_object_flags_ext(_ConstantsNamespace): # noqa - pass - -# }}} - - -_locals = locals() - - -# TODO: constant values are cl_ulong -@_ffi.callback('void (*)(const char*, const char* name, int64_t value)') -def _constant_callback(type_, name, value): - setattr(_locals[_ffi_pystr(type_)], _ffi_pystr(name), value) # noqa - - -_lib.populate_constants(_constant_callback) - -del _locals -del _constant_callback - -# }}} - - -# {{{ exceptions - -class Error(Exception): - class _ErrorRecord(object): - __slots__ = ('_routine', '_code', '_what') - - def __init__(self, msg='', code=0, routine=''): - self._routine = routine - assert isinstance(code, six.integer_types) - self._code = code - self._what = msg - - def routine(self): - return self._routine - - def code(self): - return self._code - - def what(self): - return self._what - - def __init__(self, *a, **kw): - if len(a) == 1 and not kw and hasattr(a[0], 'what'): - super(Error, self).__init__(a[0]) - else: - super(Error, self).__init__(self._ErrorRecord(*a, **kw)) - - def __str__(self): - val = self.args[0] - try: - val.routine - except AttributeError: - return str(val) - else: - result = "" - if val.code() != status_code.SUCCESS: - result = status_code.to_string( - val.code(), "") - routine = val.routine() - if routine: - result = "%s failed: %s" % (routine, result) - what = val.what() - if what: - if result: - result += " - " - result += what - return result - - @property - def code(self): - return self.args[0].code() - - @property - def routine(self): - return self.args[0].routine() - - @property - def what(self): - return self.args[0].what() - - def is_out_of_memory(self): - # matches C implementation in src/c_wrapper/error.h - val = self.args[0] - - return (val.code == status_code.MEM_OBJECT_ALLOCATION_FAILURE - or val.code == status_code.OUT_OF_RESOURCES - or val.code == status_code.OUT_OF_HOST_MEMORY) - - -class MemoryError(Error): - pass - - -class LogicError(Error): - pass - - -_py_RuntimeError = RuntimeError - - -class RuntimeError(Error): - pass - - -def _handle_error(error): - if error == _ffi.NULL: - return - if error.other == 1: - # non-pyopencl exceptions are handled here - e = _py_RuntimeError(_ffi_pystr(error.msg)) - _lib.free_pointer(error.msg) - _lib.free_pointer(error) - raise e - if error.code == status_code.MEM_OBJECT_ALLOCATION_FAILURE: - klass = MemoryError - elif error.code <= status_code.INVALID_VALUE: - klass = LogicError - elif status_code.INVALID_VALUE < error.code < status_code.SUCCESS: - klass = RuntimeError - else: - klass = Error - - e = klass(routine=_ffi_pystr(error.routine), - code=error.code, msg=_ffi_pystr(error.msg)) - _lib.free_pointer(error.routine) - _lib.free_pointer(error.msg) - _lib.free_pointer(error) - raise e - -# }}} - - -# {{{ Platform - -class Platform(_Common): - _id = 'platform' - - def get_devices(self, device_type=device_type.ALL): - devices = _CArray(_ffi.new('clobj_t**')) - _handle_error(_lib.platform__get_devices( - self.ptr, devices.ptr, devices.size, device_type)) - return [Device._create(devices.ptr[0][i]) - for i in range(devices.size[0])] - - def __repr__(self): - return "" % (self.name, self.int_ptr) - - -def unload_platform_compiler(plat): - _handle_error(_lib.platform__unload_compiler(plat.ptr)) - - -def get_platforms(): - platforms = _CArray(_ffi.new('clobj_t**')) - _handle_error(_lib.get_platforms(platforms.ptr, platforms.size)) - return [Platform._create(platforms.ptr[0][i]) - for i in range(platforms.size[0])] - -# }}} - - -# {{{ Device - -class Device(_Common): - _id = 'device' - - def create_sub_devices(self, props): - props = tuple(props) + (0,) - devices = _CArray(_ffi.new('clobj_t**')) - _handle_error(_lib.device__create_sub_devices( - self.ptr, devices.ptr, devices.size, props)) - return [Device._create(devices.ptr[0][i]) - for i in range(devices.size[0])] - - def __repr__(self): - return "" % ( - self.name.strip(), self.platform.name.strip(), self.int_ptr) - - @property - def persistent_unique_id(self): - return (self.vendor, self.vendor_id, self.name, self.version) - -# }}} - - -# {{{ {Device,Platform}._get_cl_version - -_OPENCL_VERSION_STRING_RE = re.compile(r"^OpenCL ([0-9]+)\.([0-9]+) .*$") - - -def _platdev_get_cl_version(self): - version_string = self.version - match = _OPENCL_VERSION_STRING_RE.match(version_string) - if match is None: - raise RuntimeError("platform %s returned non-conformant " - "platform version string '%s'" % - (self, version_string)) - - return int(match.group(1)), int(match.group(2)) - - -Platform._get_cl_version = _platdev_get_cl_version -Device._get_cl_version = _platdev_get_cl_version - -# }}} - - -# {{{ Context - -def _parse_context_properties(properties): - if properties is None: - return _ffi.NULL - - props = [] - for prop_tuple in properties: - if len(prop_tuple) != 2: - raise RuntimeError("property tuple must have length 2", - status_code.INVALID_VALUE, "Context") - - prop, value = prop_tuple - if prop is None: - raise RuntimeError("invalid context property", - status_code.INVALID_VALUE, "Context") - - props.append(prop) - if prop == context_properties.PLATFORM: - props.append(value.int_ptr) - - elif prop == getattr(context_properties, "WGL_HDC_KHR", None): - props.append(ctypes.c_ssize_t(value).value) - - elif prop in [getattr(context_properties, key, None) for key in ( - 'CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE', - 'GL_CONTEXT_KHR', - 'EGL_DISPLAY_KHR', - 'GLX_DISPLAY_KHR', - 'CGL_SHAREGROUP_KHR', - )]: - - from ctypes import _Pointer, cast - if isinstance(value, _Pointer): - val = cast(value, ctypes.c_void_p).value - else: - val = int(value) - - if not val: - raise LogicError("You most likely have not initialized " - "OpenGL properly.", - status_code.INVALID_VALUE, "Context") - props.append(val) - else: - raise RuntimeError("invalid context property", - status_code.INVALID_VALUE, "Context") - props.append(0) - return props - - -class Context(_Common): - _id = 'context' - - def __init__(self, devices=None, properties=None, dev_type=None, cache_dir=None): - c_props = _parse_context_properties(properties) - status_code = _ffi.new('cl_int*') - - _ctx = _ffi.new('clobj_t*') - if devices is not None: - # from device list - if dev_type is not None: - raise RuntimeError("one of 'devices' or 'dev_type' " - "must be None", - status_code.INVALID_VALUE, "Context") - _devices, num_devices = _clobj_list(devices) - # TODO parameter order? (for clobj_list) - _handle_error(_lib.create_context(_ctx, c_props, - num_devices, _devices)) - - else: - # from device type - if dev_type is None: - dev_type = device_type.DEFAULT - _handle_error(_lib.create_context_from_type(_ctx, c_props, - dev_type)) - - self.ptr = _ctx[0] - self.cache_dir = cache_dir - - def __repr__(self): - return "" % (self.int_ptr, - ", ".join(repr(dev) for dev in self.devices)) - - @memoize_method - def _get_cl_version(self): - return self.devices[0].platform._get_cl_version() - -# }}} - - -# {{{ CommandQueue - -class CommandQueue(_Common): - _id = 'command_queue' - - def __init__(self, context, device=None, properties=None): - if properties is None: - properties = 0 - - ptr_command_queue = _ffi.new('clobj_t*') - - _handle_error(_lib.create_command_queue( - ptr_command_queue, context.ptr, - _ffi.NULL if device is None else device.ptr, properties)) - - self.ptr = ptr_command_queue[0] - - def finish(self): - _handle_error(_lib.command_queue__finish(self.ptr)) - - def flush(self): - _handle_error(_lib.command_queue__flush(self.ptr)) - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.finish() - - def _get_cl_version(self): - return self.device._get_cl_version() - - -# }}} - - -# {{{ _norm_shape_dtype and cffi_array - -def _norm_shape_dtype(shape, dtype, order="C", strides=None, name=""): - dtype = np.dtype(dtype) - if not isinstance(shape, tuple): - try: - shape = tuple(shape) - except Exception: - shape = (shape,) - if strides is None: - if order in "cC": - strides = c_contiguous_strides(dtype.itemsize, shape) - elif order in "fF": - strides = f_contiguous_strides(dtype.itemsize, shape) - else: - raise RuntimeError("unrecognized order specifier %s" % order, - status_code.INVALID_VALUE, name) - return dtype, shape, strides - - -class cffi_array(np.ndarray): # noqa - __array_priority__ = -100.0 - - def __new__(cls, buf, shape, dtype, strides, base=None): - self = np.ndarray.__new__(cls, shape, dtype=dtype, - buffer=buf, strides=strides) - if base is None: - base = buf - self.__base = base - return self - - @property - def base(self): - return self.__base - -# }}} - - -# {{{ MemoryObjectHolder base class - -class MemoryObjectHolder(_Common, _CLKernelArg): - def get_host_array(self, shape, dtype, order="C"): - dtype, shape, strides = _norm_shape_dtype( - shape, dtype, order, None, 'MemoryObjectHolder.get_host_array') - _hostptr = _ffi.new('void**') - _size = _ffi.new('size_t*') - _handle_error(_lib.memory_object__get_host_array(self.ptr, _hostptr, - _size)) - ary = cffi_array(_ffi.buffer(_hostptr[0], _size[0]), shape, - dtype, strides, self) - if ary.nbytes > _size[0]: - raise LogicError("Resulting array is larger than memory object.", - status_code.INVALID_VALUE, - "MemoryObjectHolder.get_host_array") - return ary - -# }}} - - -# {{{ MemoryObject - -class MemoryObject(MemoryObjectHolder): - def __init__(self, hostbuf=None): - self.__hostbuf = hostbuf - - def _handle_buf_flags(self, flags): - if self.__hostbuf is None: - return _ffi.NULL, 0, None - if not mem_flags._use_host(flags): - warnings.warn("'hostbuf' was passed, but no memory flags " - "to make use of it.") - - need_retain = mem_flags._hold_host(flags) - c_hostbuf, hostbuf_size, retained_buf = _c_buffer_from_obj( - self.__hostbuf, writable=mem_flags._host_writable(flags), - retain=need_retain) - if need_retain: - self.__retained_buf = retained_buf - return c_hostbuf, hostbuf_size, retained_buf - - @property - def hostbuf(self): - return self.__hostbuf - - def release(self): - _handle_error(_lib.memory_object__release(self.ptr)) - -# }}} - - -# {{{ MemoryMap - -class MemoryMap(_Common): - """ - This class may also be used as a context manager in a ``with`` statement. - The memory corresponding to this object will be unmapped when - this object is deleted or :meth:`release` is called. - - .. automethod:: release - """ - - @classmethod - def _create(cls, ptr, shape, typestr, strides): - self = _Common._create.__func__(cls, ptr) - self.__array_interface__ = { - 'shape': shape, - 'typestr': typestr, - 'strides': strides, - 'data': (int(_lib.clobj__int_ptr(self.ptr)), False), - 'version': 3 - } - return self - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.release() - - def release(self, queue=None, wait_for=None): - c_wait_for, num_wait_for = _clobj_list(wait_for) - _event = _ffi.new('clobj_t*') - _handle_error(_lib.memory_map__release( - self.ptr, queue.ptr if queue is not None else _ffi.NULL, - c_wait_for, num_wait_for, _event)) - return Event._create(_event[0]) - -# }}} - - -# {{{ _c_buffer_from_obj - -if _PYPY: - # Convert a Python object to a tuple (ptr, num_bytes, ref) to be able to - # pass a data stream to a C function where @ptr can be passed to a pointer - # argument and @num_bytes is the number of bytes. For certain types or - # when @writable or @retain is True, @ref is the object which keep the - # pointer converted from @ptr object valid. - - def _c_buffer_from_obj(obj, writable=False, retain=False): - if isinstance(obj, bytes): - if writable: - # bytes is not writable - raise TypeError('expected an object with a writable ' - 'buffer interface.') - if retain: - buf = _ffi.new('char[]', obj) - return (buf, len(obj), buf) - return (obj, len(obj), obj) - elif isinstance(obj, np.ndarray): - # numpy array - return (_ffi.cast('void*', obj.__array_interface__['data'][0]), - obj.nbytes, obj) - elif isinstance(obj, np.generic): - if writable or retain: - raise TypeError('expected an object with a writable ' - 'buffer interface.') - - return (_ffi.cast('void*', memoryview(obj)._pypy_raw_address()), - obj.itemsize, obj) - else: - raise LogicError("PyOpencl on PyPy only accepts numpy arrays " - "and scalars arguments", status_code.INVALID_VALUE) - -elif sys.version_info >= (2, 7, 4): - import ctypes - try: - # Python 2.6 doesn't have this. - _ssize_t = ctypes.c_ssize_t - except AttributeError: - _ssize_t = ctypes.c_size_t - - def _c_buffer_from_obj(obj, writable=False, retain=False): - # {{{ try the numpy array interface first - - # avoid slow ctypes-based buffer interface wrapper - - ary_intf = getattr(obj, "__array_interface__", None) - if ary_intf is not None: - buf_base, is_read_only = ary_intf["data"] - return ( - _ffi.cast('void*', buf_base + ary_intf.get("offset", 0)), - obj.nbytes, - obj) - - # }}} - - # {{{ fall back to the old CPython buffer protocol API - - from pyopencl._buffers import Py_buffer, PyBUF_ANY_CONTIGUOUS, PyBUF_WRITABLE - - flags = PyBUF_ANY_CONTIGUOUS - if writable: - flags |= PyBUF_WRITABLE - - with Py_buffer.from_object(obj, flags) as buf: - return _ffi.cast('void*', buf.buf), buf.len, obj - - # }}} - -else: - # Py2.6 and below - - import ctypes - try: - # Python 2.6 doesn't have this. - _ssize_t = ctypes.c_ssize_t - except AttributeError: - _ssize_t = ctypes.c_size_t - - def _c_buffer_from_obj(obj, writable=False, retain=False): - # {{{ fall back to the old CPython buffer protocol API - - addr = ctypes.c_void_p() - length = _ssize_t() - - try: - if writable: - ctypes.pythonapi.PyObject_AsWriteBuffer( - ctypes.py_object(obj), ctypes.byref(addr), - ctypes.byref(length)) - else: - ctypes.pythonapi.PyObject_AsReadBuffer( - ctypes.py_object(obj), ctypes.byref(addr), - ctypes.byref(length)) - - # ctypes check exit status of these, so no need to check - # for errors. - except TypeError: - raise LogicError(routine=None, code=status_code.INVALID_VALUE, - msg=("un-sized (pure-Python) types not " - "acceptable as arguments")) - # }}} - - return _ffi.cast('void*', addr.value), length.value, obj - -# }}} - - -# {{{ Buffer - -class Buffer(MemoryObject): - _id = 'buffer' - - def __init__(self, context, flags, size=0, hostbuf=None): - MemoryObject.__init__(self, hostbuf) - c_hostbuf, hostbuf_size, retained_buf = self._handle_buf_flags(flags) - if hostbuf is not None: - if size > hostbuf_size: - raise RuntimeError("Specified size is greater than host " - "buffer size", - status_code.INVALID_VALUE, "Buffer") - if size == 0: - size = hostbuf_size - - ptr_buffer = _ffi.new('clobj_t*') - _handle_error(_lib.create_buffer( - ptr_buffer, context.ptr, flags, size, c_hostbuf)) - self.ptr = ptr_buffer[0] - - def get_sub_region(self, origin, size, flags=0): - _sub_buf = _ffi.new('clobj_t*') - _handle_error(_lib.buffer__get_sub_region(_sub_buf, self.ptr, origin, - size, flags)) - sub_buf = self._create(_sub_buf[0]) - MemoryObject.__init__(sub_buf, None) - return sub_buf - - def __getitem__(self, idx): - if not isinstance(idx, slice): - raise TypeError("buffer subscript must be a slice object") - - start, stop, stride = idx.indices(self.size) - if stride != 1: - raise ValueError("Buffer slice must have stride 1", - status_code.INVALID_VALUE, "Buffer.__getitem__") - - assert start <= stop - - size = stop - start - return self.get_sub_region(start, size) - -# }}} - - -# {{{ SVMAllocation - -class SVMAllocation(object): - """An object whose lifetime is tied to an allocation of shared virtual memory. - - .. note:: - - Most likely, you will not want to use this directly, but rather - :func:`svm_empty` and related functions which allow access to this - functionality using a friendlier, more Pythonic interface. - - .. versionadded:: 2016.2 - - .. automethod:: __init__(self, ctx, size, alignment, flags=None) - .. automethod:: release - .. automethod:: enqueue_release - """ - def __init__(self, ctx, size, alignment, flags, _interface=None): - """ - :arg ctx: a :class:`Context` - :arg flags: some of :class:`svm_mem_flags`. - """ - - self.ptr = None - - ptr = _ffi.new('void**') - _handle_error(_lib.svm_alloc( - ctx.ptr, flags, size, alignment, - ptr)) - - self.ctx = ctx - self.ptr = ptr[0] - self.is_fine_grain = flags & svm_mem_flags.SVM_FINE_GRAIN_BUFFER - - if _interface is not None: - read_write = ( - flags & mem_flags.WRITE_ONLY != 0 - or flags & mem_flags.READ_WRITE != 0) - _interface["data"] = ( - int(_ffi.cast("intptr_t", self.ptr)), not read_write) - self.__array_interface__ = _interface - - def __del__(self): - if self.ptr is not None: - self.release() - - def release(self): - _handle_error(_lib.svm_free(self.ctx.ptr, self.ptr)) - self.ptr = None - - def enqueue_release(self, queue, wait_for=None): - """ - :arg flags: a combination of :class:`pyopencl.map_flags` - :returns: a :class:`pyopencl.Event` - - |std-enqueue-blurb| - """ - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_svm_free( - ptr_event, queue.ptr, 1, self.ptr, - c_wait_for, num_wait_for)) - - self.ctx = None - self.ptr = None - - return Event._create(ptr_event[0]) - -# }}} - - -# {{{ SVM - -# TODO add clSetKernelExecInfo - -class SVM(_CLKernelArg): - """Tags an object exhibiting the Python buffer interface (such as a - :class:`numpy.ndarray`) as referring to shared virtual memory. - - Depending on the features of the OpenCL implementation, the following - types of objects may be passed to/wrapped in this type: - - * coarse-grain shared memory as returned by (e.g.) :func:`csvm_empty` - for any implementation of OpenCL 2.0. - - This is how coarse-grain SVM may be used from both host and device:: - - svm_ary = cl.SVM(cl.csvm_empty(ctx, 1000, np.float32, alignment=64)) - assert isinstance(svm_ary.mem, np.ndarray) - - with svm_ary.map_rw(queue) as ary: - ary.fill(17) # use from host - - prg.twice(queue, svm_ary.mem.shape, None, svm_ary) - - * fine-grain shared memory as returned by (e.g.) :func:`fsvm_empty`, - if the implementation supports fine-grained shared virtual memory. - This memory may directly be passed to a kernel:: - - ary = cl.fsvm_empty(ctx, 1000, np.float32) - assert isinstance(ary, np.ndarray) - - prg.twice(queue, ary.shape, None, cl.SVM(ary)) - queue.finish() # synchronize - print(ary) # access from host - - Observe how mapping (as needed in coarse-grain SVM) is no longer - necessary. - - * any :class:`numpy.ndarray` (or other Python object with a buffer - interface) if the implementation supports fine-grained *system* shared - virtual memory. - - This is how plain :mod:`numpy` arrays may directly be passed to a - kernel:: - - ary = np.zeros(1000, np.float32) - prg.twice(queue, ary.shape, None, cl.SVM(ary)) - queue.finish() # synchronize - print(ary) # access from host - - Objects of this type may be passed to kernel calls and :func:`enqueue_copy`. - Coarse-grain shared-memory *must* be mapped into host address space using - :meth:`map` before being accessed through the :mod:`numpy` interface. - - .. note:: - - This object merely serves as a 'tag' that changes the behavior - of functions to which it is passed. It has no special management - relationship to the memory it tags. For example, it is permissible - to grab a :mod:`numpy.array` out of :attr:`SVM.mem` of one - :class:`SVM` instance and use the array to construct another. - Neither of the tags need to be kept alive. - - .. versionadded:: 2016.2 - - .. attribute:: mem - - The wrapped object. - - .. automethod:: __init__ - .. automethod:: map - .. automethod:: map_ro - .. automethod:: map_rw - .. automethod:: as_buffer - """ - - def __init__(self, mem): - self.mem = mem - - def map(self, queue, flags, is_blocking=True, wait_for=None): - """ - :arg is_blocking: If *False*, subsequent code must wait on - :attr:`SVMMap.event` in the returned object before accessing the - mapped memory. - :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to - read-write. - :returns: an :class:`SVMMap` instance - - |std-enqueue-blurb| - """ - writable = bool( - flags & (map_flags.WRITE | map_flags.WRITE_INVALIDATE_REGION)) - c_buf, size, _ = _c_buffer_from_obj(self.mem, writable=writable) - - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_svm_map( - ptr_event, queue.ptr, is_blocking, flags, - c_buf, size, - c_wait_for, num_wait_for)) - - evt = Event._create(ptr_event[0]) - return SVMMap(self, queue, evt) - - def map_ro(self, queue, is_blocking=True, wait_for=None): - """Like :meth:`map`, but with *flags* set for a read-only map.""" - - return self.map(queue, map_flags.READ, - is_blocking=is_blocking, wait_for=wait_for) - - def map_rw(self, queue, is_blocking=True, wait_for=None): - """Like :meth:`map`, but with *flags* set for a read-only map.""" - - return self.map(queue, map_flags.READ | map_flags.WRITE, - is_blocking=is_blocking, wait_for=wait_for) - - def _enqueue_unmap(self, queue, wait_for=None): - c_buf, _, _ = _c_buffer_from_obj(self.mem) - - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_svm_unmap( - ptr_event, queue.ptr, - c_buf, - c_wait_for, num_wait_for)) - - return Event._create(ptr_event[0]) - - def as_buffer(self, ctx, flags=None): - """ - :arg ctx: a :class:`Context` - :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to - read-write. - :returns: a :class:`Buffer` corresponding to *self*. - - The memory referred to by this object must not be freed before - the returned :class:`Buffer` is released. - """ - - if flags is None: - flags = mem_flags.READ_WRITE - - return Buffer(ctx, flags, size=self.mem.nbytes, hostbuf=self.mem) - - -def _enqueue_svm_memcpy(queue, dst, src, size=None, - wait_for=None, is_blocking=True): - dst_buf, dst_size, _ = _c_buffer_from_obj(dst, writable=True) - src_buf, src_size, _ = _c_buffer_from_obj(src, writable=False) - - if size is None: - size = min(dst_size, src_size) - - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_svm_memcpy( - ptr_event, queue.ptr, bool(is_blocking), - dst_buf, src_buf, size, - c_wait_for, num_wait_for, - NannyEvent._handle((dst_buf, src_buf)))) - - return NannyEvent._create(ptr_event[0]) - - -def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None): - """Fill shared virtual memory with a pattern. - - :arg dest: a Python buffer object, optionally wrapped in an :class:`SVM` object - :arg pattern: a Python buffer object (e.g. a :class:`numpy.ndarray` with the - fill pattern to be used. - :arg byte_count: The size of the memory to be fill. Defaults to the - entirety of *dest*. - - |std-enqueue-blurb| - - .. versionadded:: 2016.2 - """ - - if isinstance(dest, SVM): - dest = dest.mem - - dst_buf, dst_size, _ = _c_buffer_from_obj(dest, writable=True) - pattern_buf, pattern_size, _ = _c_buffer_from_obj(pattern, writable=False) - - if byte_count is None: - byte_count = dst_size - - # pattern is copied, no need to nanny. - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_svm_memfill( - ptr_event, queue.ptr, - dst_buf, pattern_buf, pattern_size, byte_count, - c_wait_for, num_wait_for)) - - return Event._create(ptr_event[0]) - - -def enqueue_svm_migratemem(queue, svms, flags, wait_for=None): - """ - :arg svms: a collection of Python buffer objects (e.g. :mod:`numpy` - arrrays), optionally wrapped in :class:`SVM` objects. - :arg flags: a combination of :class:`mem_migration_flags` - - |std-enqueue-blurb| - - .. versionadded:: 2016.2 - - This function requires OpenCL 2.1. - """ - - svm_pointers = _ffi.new('void *', len(svms)) - sizes = _ffi.new('size_t', len(svms)) - - for i, svm in enumerate(svms): - if isinstance(svm, SVM): - svm = svm.mem - - buf, size, _ = _c_buffer_from_obj(svm, writable=False) - svm_pointers[i] = buf - sizes[i] = size - - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_svm_memfill( - ptr_event, queue.ptr, - len(svms), svm_pointers, sizes, flags, - c_wait_for, num_wait_for)) - - return Event._create(ptr_event[0]) - -# }}} - - -# {{{ SVMMap - -class SVMMap(_CLKernelArg): - """ - .. attribute:: event - - .. versionadded:: 2016.2 - - .. automethod:: release - - This class may also be used as a context manager in a ``with`` statement. - :meth:`release` will be called upon exit from the ``with`` region. - The value returned to the ``as`` part of the context manager is the - mapped Python object (e.g. a :mod:`numpy` array). - """ - def __init__(self, svm, queue, event): - self.svm = svm - self.queue = queue - self.event = event - - def __del__(self): - if self.svm is not None: - self.release() - - def __enter__(self): - return self.svm.mem - - def __exit__(self, exc_type, exc_val, exc_tb): - self.release() - - def release(self, queue=None, wait_for=None): - """ - :arg queue: a :class:`pyopencl.CommandQueue`. Defaults to the one - with which the map was created, if not specified. - :returns: a :class:`pyopencl.Event` - - |std-enqueue-blurb| - """ - - evt = self.svm._enqueue_unmap(self.queue) - self.svm = None - - return evt - -# }}} - - -# {{{ Program - -class CompilerWarning(UserWarning): - pass - - -def compiler_output(text): - import os - from warnings import warn - if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")): - warn(text, CompilerWarning) - else: - warn("Non-empty compiler output encountered. Set the " - "environment variable PYOPENCL_COMPILER_OUTPUT=1 " - "to see more.", CompilerWarning) - - -class _Program(_Common): - _id = 'program' - - def __init__(self, *args): - if len(args) == 2: - ctx, source = args - from pyopencl.tools import is_spirv - if is_spirv(source): - self._init_il(ctx, source) - else: - self._init_source(ctx, source) - else: - self._init_binary(*args) - - def _init_source(self, context, src): - ptr_program = _ffi.new('clobj_t*') - _handle_error(_lib.create_program_with_source( - ptr_program, context.ptr, _to_cstring(src))) - self.ptr = ptr_program[0] - - def _init_il(self, context, il): - ptr_program = _ffi.new('clobj_t*') - _handle_error(_lib.create_program_with_il( - ptr_program, context.ptr, il, len(il))) - self.ptr = ptr_program[0] - - def _init_binary(self, context, devices, binaries): - if len(devices) != len(binaries): - raise RuntimeError("device and binary counts don't match", - status_code.INVALID_VALUE, - "create_program_with_binary") - - ptr_program = _ffi.new('clobj_t*') - ptr_devices, num_devices = _clobj_list(devices) - ptr_binaries = [_ffi.new('unsigned char[]', binary) - for binary in binaries] - binary_sizes = [len(b) for b in binaries] - - # TODO parameter order? (for clobj_list) - _handle_error(_lib.create_program_with_binary( - ptr_program, context.ptr, num_devices, ptr_devices, - ptr_binaries, binary_sizes)) - - self.ptr = ptr_program[0] - - def kind(self): - kind = _ffi.new('int*') - _handle_error(_lib.program__kind(self.ptr, kind)) - return kind[0] - - def _build(self, options=None, devices=None): - if options is None: - options = b"" - # TODO? reverse parameter order - ptr_devices, num_devices = _clobj_list(devices) - _handle_error(_lib.program__build(self.ptr, options, - num_devices, ptr_devices)) - - def get_build_info(self, device, param): - info = _ffi.new('generic_info *') - _handle_error(_lib.program__get_build_info( - self.ptr, device.ptr, param, info)) - return _generic_info_to_python(info) - - def compile(self, options="", devices=None, headers=[]): - _devs, num_devs = _clobj_list(devices) - _prgs, names = list(zip(*((prg.ptr, _to_cstring(name)) - for (name, prg) in headers))) - _handle_error(_lib.program__compile( - self.ptr, _to_cstring(options), _devs, num_devs, - _prgs, names, len(names))) - - @classmethod - def link(cls, context, programs, options="", devices=None): - _devs, num_devs = _clobj_list(devices) - _prgs, num_prgs = _clobj_list(programs) - _prg = _ffi.new('clobj_t*') - _handle_error(_lib.program__link( - _prg, context.ptr, _prgs, num_prgs, _to_cstring(options), - _devs, num_devs)) - return cls._create(_prg[0]) - - @classmethod - def create_with_builtin_kernels(cls, context, devices, kernel_names): - _devs, num_devs = _clobj_list(devices) - _prg = _ffi.new('clobj_t*') - _handle_error(_lib.program__create_with_builtin_kernels( - _prg, context.ptr, _devs, num_devs, _to_cstring(kernel_names))) - return cls._create(_prg[0]) - - def all_kernels(self): - knls = _CArray(_ffi.new('clobj_t**')) - _handle_error(_lib.program__all_kernels( - self.ptr, knls.ptr, knls.size)) - return [ - Kernel - ._create(knls.ptr[0][i]) - ._setup(self) - for i in range(knls.size[0])] - - def _get_build_logs(self): - build_logs = [] - for dev in self.get_info(program_info.DEVICES): - try: - log = self.get_build_info(dev, program_build_info.LOG) - except Exception: - log = "" - - build_logs.append((dev, log)) - - return build_logs - - def build(self, options_bytes, devices=None): - logger.debug("build program: start") - err = None - try: - self._build(options=options_bytes, devices=devices) - except Error as e: - msg = e.what + "\n\n" + (75 * "=" + "\n").join( - "Build on %s:\n\n%s" % (dev, log) - for dev, log in self._get_build_logs()) - code = e.code - routine = e.routine - - err = RuntimeError( - Error._ErrorRecord( - msg=msg, - code=code, - routine=routine)) - - if err is not None: - # Python 3.2 outputs the whole list of currently active exceptions - # This serves to remove one (redundant) level from that nesting. - - logger.debug("build program: completed, error") - raise err - - logger.debug("build program: completed, success") - - message = (75 * "=" + "\n").join( - "Build on %s succeeded, but said:\n\n%s" % (dev, log) - for dev, log in self._get_build_logs() - if log is not None and log.strip()) - - if message: - if self.kind() == program_kind.SOURCE: - build_type = "From-source build" - elif self.kind() == program_kind.BINARY: - build_type = "From-binary build" - else: - build_type = "Build" - - compiler_output("%s succeeded, but resulted in non-empty logs:\n%s" - % (build_type, message)) - - return self - -# }}} - - -class LocalMemory(_CLKernelArg): - __slots__ = ('_size',) - - def __init__(self, size): - self._size = size - - @property - def size(self): - return self._size - - -# {{{ Kernel - -class Kernel(_Common): - _id = 'kernel' - - def __init__(self, program, name): - if not isinstance(program, _Program): - program = program._get_prg() - - ptr_kernel = _ffi.new('clobj_t*') - _handle_error(_lib.create_kernel(ptr_kernel, program.ptr, - _to_cstring(name))) - self.ptr = ptr_kernel[0] - - self._setup(program) - - def _setup(self, prg): - self._source = getattr(prg, "_source", None) - - from pyopencl.invoker import generate_enqueue_and_set_args - self._enqueue, self._set_args = generate_enqueue_and_set_args( - self.function_name, self.num_args, self.num_args, - None, - warn_about_arg_count_bug=None, - work_around_arg_count_bug=None) - - self._wg_info_cache = {} - return self - - def set_scalar_arg_dtypes(self, scalar_arg_dtypes): - self._scalar_arg_dtypes = tuple(scalar_arg_dtypes) - - # {{{ arg counting bug handling - - # For example: - # https://github.com/pocl/pocl/issues/197 - # (but Apple CPU has a similar bug) - - work_around_arg_count_bug = False - warn_about_arg_count_bug = False - - from pyopencl.characterize import has_struct_arg_count_bug - - count_bug_per_dev = [ - has_struct_arg_count_bug(dev, self.context) - for dev in self.context.devices] - - from pytools import single_valued - if any(count_bug_per_dev): - if all(count_bug_per_dev): - work_around_arg_count_bug = single_valued(count_bug_per_dev) - else: - warn_about_arg_count_bug = True - - # }}} - - from pyopencl.invoker import generate_enqueue_and_set_args - self._enqueue, self._set_args = generate_enqueue_and_set_args( - self.function_name, - len(scalar_arg_dtypes), self.num_args, - self._scalar_arg_dtypes, - warn_about_arg_count_bug=warn_about_arg_count_bug, - work_around_arg_count_bug=work_around_arg_count_bug) - - def set_args(self, *args, **kwargs): - # Need to duplicate the 'self' argument for dynamically generated method - return self._set_args(self, *args, **kwargs) - - def __call__(self, queue, global_size, local_size, *args, **kwargs): - # __call__ can't be overridden directly, so we need this - # trampoline hack. - return self._enqueue(self, queue, global_size, local_size, *args, **kwargs) - - def capture_call(self, filename, queue, global_size, local_size, - *args, **kwargs): - from pyopencl.capture_call import capture_kernel_call - capture_kernel_call(self, filename, queue, global_size, local_size, - *args, **kwargs) - - def _set_arg_clkernelarg(self, arg_index, arg): - if isinstance(arg, MemoryObjectHolder): - _handle_error(_lib.kernel__set_arg_mem(self.ptr, arg_index, arg.ptr)) - elif isinstance(arg, SVM): - c_buf, _, _ = _c_buffer_from_obj(arg.mem) - _handle_error(_lib.kernel__set_arg_svm_pointer( - self.ptr, arg_index, c_buf)) - elif isinstance(arg, Sampler): - _handle_error(_lib.kernel__set_arg_sampler(self.ptr, arg_index, - arg.ptr)) - elif isinstance(arg, LocalMemory): - _handle_error(_lib.kernel__set_arg_buf(self.ptr, arg_index, - _ffi.NULL, arg.size)) - else: - raise RuntimeError("unexpected _CLKernelArg subclass" - "dimensions", status_code.INVALID_VALUE, - "clSetKernelArg") - - def set_arg(self, arg_index, arg): - # If you change this, also change the kernel call generation logic. - if arg is None: - _handle_error(_lib.kernel__set_arg_null(self.ptr, arg_index)) - elif isinstance(arg, _CLKernelArg): - self._set_arg_clkernelarg(arg_index, arg) - elif _CPY2 and isinstance(arg, np.generic): - # https://github.com/numpy/numpy/issues/5381 - c_buf, size, _ = _c_buffer_from_obj(np.getbuffer(arg)) - _handle_error(_lib.kernel__set_arg_buf(self.ptr, arg_index, - c_buf, size)) - else: - c_buf, size, _ = _c_buffer_from_obj(arg) - _handle_error(_lib.kernel__set_arg_buf(self.ptr, arg_index, - c_buf, size)) - - def get_work_group_info(self, param, device): - try: - return self._wg_info_cache[param, device] - except KeyError: - pass - - info = _ffi.new('generic_info*') - _handle_error(_lib.kernel__get_work_group_info( - self.ptr, param, device.ptr, info)) - result = _generic_info_to_python(info) - - self._wg_info_cache[param, device] = result - return result - - def get_arg_info(self, idx, param): - info = _ffi.new('generic_info*') - _handle_error(_lib.kernel__get_arg_info(self.ptr, idx, param, info)) - return _generic_info_to_python(info) - -# }}} - - -# {{{ Event - -class Event(_Common): - _id = 'event' - - def __init__(self): - pass - - def get_profiling_info(self, param): - info = _ffi.new('generic_info *') - _handle_error(_lib.event__get_profiling_info(self.ptr, param, info)) - return _generic_info_to_python(info) - - def wait(self): - _handle_error(_lib.event__wait(self.ptr)) - - def set_callback(self, _type, cb): - def _func(status): - cb(status) - _handle_error(_lib.event__set_callback(self.ptr, _type, - _ffi.new_handle(_func))) - - -class ProfilingInfoGetter: - def __init__(self, event): - self.event = event - - def __getattr__(self, name): - info_cls = profiling_info - - try: - inf_attr = getattr(info_cls, name.upper()) - except AttributeError: - raise AttributeError("%s has no attribute '%s'" - % (type(self), name)) - else: - return self.event.get_profiling_info(inf_attr) - - -Event.profile = property(ProfilingInfoGetter) - - -def wait_for_events(wait_for): - if wait_for is None or len(wait_for) == 0: - return - _handle_error(_lib.wait_for_events(*_clobj_list(wait_for))) - - -class NannyEvent(Event): - class _Data(object): - __slots__ = ('ward', 'ref') - - def __init__(self, ward, ref): - self.ward = ward - self.ref = ref - - @classmethod - def _handle(cls, ward, ref=None): - return _ffi.new_handle(cls._Data(ward, ref)) - - def get_ward(self): - _handle = _lib.nanny_event__get_ward(self.ptr) - if _handle == _ffi.NULL: - return - return _ffi.from_handle(_handle).ward - - -class UserEvent(Event): - def __init__(self, ctx): - _evt = _ffi.new('clobj_t*') - _handle_error(_lib.create_user_event(_evt, ctx.ptr)) - self.ptr = _evt[0] - - def set_status(self, status): - _handle_error(_lib.user_event__set_status(self.ptr, status)) - -# }}} - - -# {{{ enqueue_nd_range_kernel - -def enqueue_nd_range_kernel(queue, kernel, global_work_size, local_work_size, - global_work_offset=None, wait_for=None, - g_times_l=False): - - work_dim = len(global_work_size) - - if local_work_size is not None: - if g_times_l: - work_dim = max(work_dim, len(local_work_size)) - elif work_dim != len(local_work_size): - raise RuntimeError("global/local work sizes have differing " - "dimensions", status_code.INVALID_VALUE, - "enqueue_nd_range_kernel") - - if len(local_work_size) < work_dim: - local_work_size = (local_work_size + - (1,) * (work_dim - len(local_work_size))) - if len(global_work_size) < work_dim: - global_work_size = (global_work_size + - (1,) * (work_dim - len(global_work_size))) - if g_times_l: - global_work_size = tuple( - global_work_size[i] * local_work_size[i] - for i in range(work_dim)) - - c_global_work_offset = _ffi.NULL - if global_work_offset is not None: - if work_dim != len(global_work_offset): - raise RuntimeError("global work size and offset have differing " - "dimensions", status_code.INVALID_VALUE, - "enqueue_nd_range_kernel") - - c_global_work_offset = global_work_offset - - if local_work_size is None: - local_work_size = _ffi.NULL - - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_nd_range_kernel( - ptr_event, queue.ptr, kernel.ptr, work_dim, c_global_work_offset, - global_work_size, local_work_size, c_wait_for, num_wait_for)) - return Event._create(ptr_event[0]) - -# }}} - - -# {{{ enqueue_task - -def enqueue_task(queue, kernel, wait_for=None): - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_task( - _event, queue.ptr, kernel.ptr, c_wait_for, num_wait_for)) - return Event._create(_event[0]) - -# }}} - - -# {{{ _enqueue_marker_* - -def _enqueue_marker_with_wait_list(queue, wait_for=None): - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_marker_with_wait_list( - ptr_event, queue.ptr, c_wait_for, num_wait_for)) - return Event._create(ptr_event[0]) - - -def _enqueue_marker(queue): - ptr_event = _ffi.new('clobj_t*') - _handle_error(_lib.enqueue_marker(ptr_event, queue.ptr)) - return Event._create(ptr_event[0]) - -# }}} - - -# {{{ _enqueue_barrier_* - -def _enqueue_barrier_with_wait_list(queue, wait_for=None): - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_barrier_with_wait_list( - ptr_event, queue.ptr, c_wait_for, num_wait_for)) - return Event._create(ptr_event[0]) - - -def _enqueue_barrier(queue): - _handle_error(_lib.enqueue_barrier(queue.ptr)) - -# }}} - - -# {{{ enqueue_migrate_mem_object* - -def enqueue_migrate_mem_objects(queue, mem_objects, flags, wait_for=None): - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - c_mem_objs, num_mem_objs = _clobj_list(mem_objects) - _handle_error(_lib.enqueue_migrate_mem_objects( - _event, queue.ptr, c_mem_objs, num_mem_objs, flags, - c_wait_for, num_wait_for)) - return Event._create(_event[0]) - - -def enqueue_migrate_mem_object_ext(queue, mem_objects, flags, wait_for=None): - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - c_mem_objs, num_mem_objs = _clobj_list(mem_objects) - _handle_error(_lib.enqueue_migrate_mem_object_ext( - _event, queue.ptr, c_mem_objs, num_mem_objs, flags, - c_wait_for, num_wait_for)) - return Event._create(_event[0]) - -# }}} - - -# {{{ _enqueue_wait_for_events - -def _enqueue_wait_for_events(queue, wait_for=None): - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_wait_for_events(queue.ptr, c_wait_for, - num_wait_for)) - -# }}} - - -# {{{ _enqueue_*_buffer - -def _enqueue_read_buffer(queue, mem, hostbuf, device_offset=0, - wait_for=None, is_blocking=True): - c_buf, size, _ = _c_buffer_from_obj(hostbuf, writable=True) - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_read_buffer( - ptr_event, queue.ptr, mem.ptr, c_buf, size, device_offset, - c_wait_for, num_wait_for, bool(is_blocking), - NannyEvent._handle(hostbuf))) - return NannyEvent._create(ptr_event[0]) - - -def _enqueue_write_buffer(queue, mem, hostbuf, device_offset=0, - wait_for=None, is_blocking=True): - c_buf, size, c_ref = _c_buffer_from_obj(hostbuf, retain=True) - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_write_buffer( - ptr_event, queue.ptr, mem.ptr, c_buf, size, device_offset, - c_wait_for, num_wait_for, bool(is_blocking), - NannyEvent._handle(hostbuf, c_ref))) - return NannyEvent._create(ptr_event[0]) - - -def _enqueue_copy_buffer(queue, src, dst, byte_count=-1, src_offset=0, - dst_offset=0, wait_for=None): - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_copy_buffer( - ptr_event, queue.ptr, src.ptr, dst.ptr, byte_count, src_offset, - dst_offset, c_wait_for, num_wait_for)) - return Event._create(ptr_event[0]) - - -def _enqueue_read_buffer_rect(queue, mem, hostbuf, buffer_origin, - host_origin, region, buffer_pitches=None, - host_pitches=None, wait_for=None, - is_blocking=True): - buffer_origin = tuple(buffer_origin) - host_origin = tuple(host_origin) - region = tuple(region) - if buffer_pitches is None: - buffer_pitches = _ffi.NULL - buffer_pitches_l = 0 - else: - buffer_pitches = tuple(buffer_pitches) - buffer_pitches_l = len(buffer_pitches) - if host_pitches is None: - host_pitches = _ffi.NULL - host_pitches_l = 0 - else: - host_pitches = tuple(host_pitches) - host_pitches_l = len(host_pitches) - - buffer_origin_l = len(buffer_origin) - host_origin_l = len(host_origin) - region_l = len(region) - if (buffer_origin_l > 3 or host_origin_l > 3 or region_l > 3 or - buffer_pitches_l > 2 or host_pitches_l > 2): - raise RuntimeError("(buffer/host)_origin, (buffer/host)_pitches or " - "region has too many components", - status_code.INVALID_VALUE, - "enqueue_read_buffer_rect") - c_buf, size, _ = _c_buffer_from_obj(hostbuf, writable=True) - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_read_buffer_rect( - _event, queue.ptr, mem.ptr, c_buf, buffer_origin, buffer_origin_l, - host_origin, host_origin_l, region, region_l, buffer_pitches, - buffer_pitches_l, host_pitches, host_pitches_l, c_wait_for, - num_wait_for, bool(is_blocking), NannyEvent._handle(hostbuf))) - return NannyEvent._create(_event[0]) - - -def _enqueue_write_buffer_rect(queue, mem, hostbuf, buffer_origin, - host_origin, region, buffer_pitches=None, - host_pitches=None, wait_for=None, - is_blocking=True): - buffer_origin = tuple(buffer_origin) - host_origin = tuple(host_origin) - region = tuple(region) - if buffer_pitches is None: - buffer_pitches = _ffi.NULL - buffer_pitches_l = 0 - else: - buffer_pitches = tuple(buffer_pitches) - buffer_pitches_l = len(buffer_pitches) - if host_pitches is None: - host_pitches = _ffi.NULL - host_pitches_l = 0 - else: - host_pitches = tuple(host_pitches) - host_pitches_l = len(host_pitches) - - buffer_origin_l = len(buffer_origin) - host_origin_l = len(host_origin) - region_l = len(region) - if (buffer_origin_l > 3 or host_origin_l > 3 or region_l > 3 or - buffer_pitches_l > 2 or host_pitches_l > 2): - raise RuntimeError("(buffer/host)_origin, (buffer/host)_pitches or " - "region has too many components", - status_code.INVALID_VALUE, - "enqueue_write_buffer_rect") - c_buf, size, c_ref = _c_buffer_from_obj(hostbuf, retain=True) - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_write_buffer_rect( - _event, queue.ptr, mem.ptr, c_buf, buffer_origin, buffer_origin_l, - host_origin, host_origin_l, region, region_l, buffer_pitches, - buffer_pitches_l, host_pitches, host_pitches_l, c_wait_for, - num_wait_for, bool(is_blocking), NannyEvent._handle(hostbuf, c_ref))) - return NannyEvent._create(_event[0]) - - -def _enqueue_copy_buffer_rect(queue, src, dst, src_origin, dst_origin, region, - src_pitches=None, dst_pitches=None, - wait_for=None): - src_origin = tuple(src_origin) - dst_origin = tuple(dst_origin) - region = tuple(region) - if src_pitches is None: - src_pitches = _ffi.NULL - src_pitches_l = 0 - else: - src_pitches = tuple(src_pitches) - src_pitches_l = len(src_pitches) - if dst_pitches is None: - dst_pitches = _ffi.NULL - dst_pitches_l = 0 - else: - dst_pitches = tuple(dst_pitches) - dst_pitches_l = len(dst_pitches) - src_origin_l = len(src_origin) - dst_origin_l = len(dst_origin) - region_l = len(region) - if (src_origin_l > 3 or dst_origin_l > 3 or region_l > 3 or - src_pitches_l > 2 or dst_pitches_l > 2): - raise RuntimeError("(src/dst)_origin, (src/dst)_pitches or " - "region has too many components", - status_code.INVALID_VALUE, - "enqueue_copy_buffer_rect") - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_copy_buffer_rect( - _event, queue.ptr, src.ptr, dst.ptr, src_origin, src_origin_l, - dst_origin, dst_origin_l, region, region_l, src_pitches, - src_pitches_l, dst_pitches, dst_pitches_l, c_wait_for, num_wait_for)) - return Event._create(_event[0]) - - -# PyPy bug report: https://bitbucket.org/pypy/pypy/issue/1777/unable-to-create-proper-numpy-array-from # noqa -def enqueue_map_buffer(queue, buf, flags, offset, shape, dtype, - order="C", strides=None, wait_for=None, - is_blocking=True): - dtype, shape, strides = _norm_shape_dtype(shape, dtype, order, strides, - 'enqueue_map_buffer') - byte_size = dtype.itemsize - for s in shape: - byte_size *= s - c_wait_for, num_wait_for = _clobj_list(wait_for) - _event = _ffi.new('clobj_t*') - _map = _ffi.new('clobj_t*') - _handle_error(_lib.enqueue_map_buffer(_event, _map, queue.ptr, buf.ptr, - flags, offset, byte_size, c_wait_for, - num_wait_for, bool(is_blocking))) - mmap = MemoryMap._create(_map[0], shape, dtype.str, strides) - ary = np.asarray(mmap) - ary.dtype = dtype - - return (ary, Event._create(_event[0])) - - -def _enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None): - c_pattern, psize, c_ref = _c_buffer_from_obj(pattern) - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_fill_buffer( - _event, queue.ptr, mem.ptr, c_pattern, psize, offset, size, - c_wait_for, num_wait_for)) - return Event._create(_event[0]) - -# }}} - - -# {{{ _enqueue_*_image - -def _enqueue_read_image(queue, mem, origin, region, hostbuf, row_pitch=0, - slice_pitch=0, wait_for=None, is_blocking=True): - origin = tuple(origin) - region = tuple(region) - origin_l = len(origin) - region_l = len(region) - if origin_l > 3 or region_l > 3: - raise RuntimeError("origin or region has too many components", - status_code.INVALID_VALUE, "enqueue_read_image") - c_buf, size, _ = _c_buffer_from_obj(hostbuf, writable=True) - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - # TODO check buffer size - _handle_error(_lib.enqueue_read_image( - ptr_event, queue.ptr, mem.ptr, origin, origin_l, region, region_l, - c_buf, row_pitch, slice_pitch, c_wait_for, num_wait_for, - bool(is_blocking), NannyEvent._handle(hostbuf))) - return NannyEvent._create(ptr_event[0]) - - -def _enqueue_copy_image(queue, src, dest, src_origin, dest_origin, region, - wait_for=None): - src_origin = tuple(src_origin) - region = tuple(region) - src_origin_l = len(src_origin) - dest_origin_l = len(dest_origin) - region_l = len(region) - if src_origin_l > 3 or dest_origin_l > 3 or region_l > 3: - raise RuntimeError("(src/dest)origin or region has too many components", - status_code.INVALID_VALUE, "enqueue_copy_image") - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_copy_image( - _event, queue.ptr, src.ptr, dest.ptr, src_origin, src_origin_l, - dest_origin, dest_origin_l, region, region_l, c_wait_for, num_wait_for)) - return Event._create(_event[0]) - - -def _enqueue_write_image(queue, mem, origin, region, hostbuf, row_pitch=0, - slice_pitch=0, wait_for=None, is_blocking=True): - origin = tuple(origin) - region = tuple(region) - origin_l = len(origin) - region_l = len(region) - if origin_l > 3 or region_l > 3: - raise RuntimeError("origin or region has too many components", - status_code.INVALID_VALUE, "enqueue_write_image") - c_buf, size, c_ref = _c_buffer_from_obj(hostbuf, retain=True) - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - # TODO: check buffer size - _handle_error(_lib.enqueue_write_image( - _event, queue.ptr, mem.ptr, origin, origin_l, region, region_l, - c_buf, row_pitch, slice_pitch, c_wait_for, num_wait_for, - bool(is_blocking), NannyEvent._handle(hostbuf, c_ref))) - return NannyEvent._create(_event[0]) - - -def enqueue_map_image(queue, img, flags, origin, region, shape, dtype, - order="C", strides=None, wait_for=None, is_blocking=True): - origin = tuple(origin) - region = tuple(region) - origin_l = len(origin) - region_l = len(region) - if origin_l > 3 or region_l > 3: - raise RuntimeError("origin or region has too many components", - status_code.INVALID_VALUE, "enqueue_map_image") - dtype, shape, strides = _norm_shape_dtype(shape, dtype, order, strides, - 'enqueue_map_image') - _event = _ffi.new('clobj_t*') - _map = _ffi.new('clobj_t*') - _row_pitch = _ffi.new('size_t*') - _slice_pitch = _ffi.new('size_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_map_image(_event, _map, queue.ptr, img.ptr, - flags, origin, origin_l, region, - region_l, _row_pitch, _slice_pitch, - c_wait_for, num_wait_for, is_blocking)) - mmap = MemoryMap._create(_map[0], shape, dtype.str, strides) - ary = np.asarray(mmap) - ary.dtype = dtype - return (ary, Event._create(_event[0]), _row_pitch[0], _slice_pitch[0]) - - -def enqueue_fill_image(queue, img, color, origin, region, wait_for=None): - origin = tuple(origin) - region = tuple(region) - origin_l = len(origin) - region_l = len(region) - color_l = len(color) - if origin_l > 3 or region_l > 3 or color_l > 4: - raise RuntimeError("origin, region or color has too many components", - status_code.INVALID_VALUE, "enqueue_fill_image") - color = np.array(color).astype(img._fill_type) - c_color = _ffi.cast('void*', color.__array_interface__['data'][0]) - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_fill_image(_event, queue.ptr, img.ptr, - c_color, origin, origin_l, region, - region_l, c_wait_for, num_wait_for)) - return Event._create(_event[0]) - - -def _enqueue_copy_image_to_buffer(queue, src, dest, origin, region, offset, - wait_for=None): - origin = tuple(origin) - region = tuple(region) - origin_l = len(origin) - region_l = len(region) - if origin_l > 3 or region_l > 3: - raise RuntimeError("origin or region has too many components", - status_code.INVALID_VALUE, - "enqueue_copy_image_to_buffer") - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_copy_image_to_buffer( - _event, queue.ptr, src.ptr, dest.ptr, origin, origin_l, region, - region_l, offset, c_wait_for, num_wait_for)) - return Event._create(_event[0]) - - -def _enqueue_copy_buffer_to_image(queue, src, dest, offset, origin, region, - wait_for=None): - origin = tuple(origin) - region = tuple(region) - origin_l = len(origin) - region_l = len(region) - if origin_l > 3 or region_l > 3: - raise RuntimeError("origin or region has too many components", - status_code.INVALID_VALUE, - "enqueue_copy_buffer_to_image") - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_copy_buffer_to_image( - _event, queue.ptr, src.ptr, dest.ptr, offset, origin, origin_l, - region, region_l, c_wait_for, num_wait_for)) - return Event._create(_event[0]) - -# }}} - - -# {{{ gl interop - -def have_gl(): - return bool(_lib.have_gl()) - - -class _GLObject(object): - def get_gl_object_info(self): - otype = _ffi.new('cl_gl_object_type*') - gl_name = _ffi.new('GLuint*') - _handle_error(_lib.get_gl_object_info(self.ptr, otype, gl_name)) - return otype[0], gl_name[0] - - -class GLBuffer(MemoryObject, _GLObject): - _id = 'gl_buffer' - - def __init__(self, context, flags, bufobj): - MemoryObject.__init__(self) - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_from_gl_buffer( - ptr, context.ptr, flags, bufobj)) - self.ptr = ptr[0] - - -class GLRenderBuffer(MemoryObject, _GLObject): - _id = 'gl_renderbuffer' - - def __init__(self, context, flags, bufobj): - MemoryObject.__init__(self, bufobj) - c_buf, bufsize, retained = self._handle_buf_flags(flags) - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_from_gl_renderbuffer( - ptr, context.ptr, flags, c_buf)) - self.ptr = ptr[0] - - -def _create_gl_enqueue(what): - def enqueue_gl_objects(queue, mem_objects, wait_for=None): - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - c_mem_objects, num_mem_objects = _clobj_list(mem_objects) - _handle_error(what(ptr_event, queue.ptr, c_mem_objects, num_mem_objects, - c_wait_for, num_wait_for)) - return Event._create(ptr_event[0]) - return enqueue_gl_objects - - -if _lib.have_gl(): - enqueue_acquire_gl_objects = _create_gl_enqueue( - _lib.enqueue_acquire_gl_objects) - enqueue_release_gl_objects = _create_gl_enqueue( - _lib.enqueue_release_gl_objects) - try: - get_apple_cgl_share_group = _lib.get_apple_cgl_share_group - except AttributeError: - pass - -# }}} - - -def _cffi_property(_name=None, read=True, write=True): - def _deco(get_ptr): - name = _name if _name else get_ptr.__name__ - return property((lambda self: getattr(get_ptr(self), name)) if read - else (lambda self: None), - (lambda self, v: setattr(get_ptr(self), name, v)) - if write else (lambda self, v: None)) - return _deco - - -# {{{ ImageFormat - -class ImageFormat(object): - # Hack around fmt.__dict__ check in test_wrapper.py - __dict__ = {} - __slots__ = ('ptr',) - - def __init__(self, channel_order=0, channel_type=0): - self.ptr = _ffi.new("cl_image_format*") - self.channel_order = channel_order - self.channel_data_type = channel_type - - @_cffi_property('image_channel_order') - def channel_order(self): - return self.ptr - - @_cffi_property('image_channel_data_type') - def channel_data_type(self): - return self.ptr - - @property - def channel_count(self): - try: - return { - channel_order.R: 1, - channel_order.A: 1, - channel_order.RG: 2, - channel_order.RA: 2, - channel_order.RGB: 3, - channel_order.RGBA: 4, - channel_order.BGRA: 4, - channel_order.INTENSITY: 1, - channel_order.LUMINANCE: 1, - }[self.channel_order] - except KeyError: - raise LogicError("unrecognized channel order", - status_code.INVALID_VALUE, - "ImageFormat.channel_count") - - @property - def dtype_size(self): - try: - return { - channel_type.SNORM_INT8: 1, - channel_type.SNORM_INT16: 2, - channel_type.UNORM_INT8: 1, - channel_type.UNORM_INT16: 2, - channel_type.UNORM_SHORT_565: 2, - channel_type.UNORM_SHORT_555: 2, - channel_type.UNORM_INT_101010: 4, - channel_type.SIGNED_INT8: 1, - channel_type.SIGNED_INT16: 2, - channel_type.SIGNED_INT32: 4, - channel_type.UNSIGNED_INT8: 1, - channel_type.UNSIGNED_INT16: 2, - channel_type.UNSIGNED_INT32: 4, - channel_type.HALF_FLOAT: 2, - channel_type.FLOAT: 4, - }[self.channel_data_type] - except KeyError: - raise LogicError("unrecognized channel data type", - status_code.INVALID_VALUE, - "ImageFormat.channel_dtype_size") - - @property - def itemsize(self): - return self.channel_count * self.dtype_size - - def __repr__(self): - return "ImageFormat(%s, %s)" % ( - channel_order.to_string(self.channel_order, - ""), - channel_type.to_string(self.channel_data_type, - "")) - - def __eq__(self, other): - return (self.channel_order == other.channel_order - and self.channel_data_type == other.channel_data_type) - - def __ne__(self, other): - return not self.__eq__(other) - - def __hash__(self): - return hash((type(self), self.channel_order, self.channel_data_type)) - - -def get_supported_image_formats(context, flags, image_type): - info = _ffi.new('generic_info*') - _handle_error(_lib.context__get_supported_image_formats( - context.ptr, flags, image_type, info)) - return _generic_info_to_python(info) - -# }}} - - -# {{{ ImageDescriptor - -def _write_only_property(*arg): - return property().setter(*arg) - - -class ImageDescriptor(object): - __slots__ = ('ptr',) - - def __init__(self): - self.ptr = _ffi.new("cl_image_desc*") - - @_cffi_property() - def image_type(self): - return self.ptr - - @_cffi_property('image_array_size') - def array_size(self): - return self.ptr - - @_cffi_property() - def num_mip_levels(self): - return self.ptr - - @_cffi_property() - def num_samples(self): - return self.ptr - - @_write_only_property - def shape(self, shape): - sdims = len(shape) - if sdims > 3: - raise LogicError("shape has too many components", - status_code.INVALID_VALUE, "transfer") - desc = self.ptr - desc.image_width = shape[0] if sdims > 0 else 1 - desc.image_height = shape[1] if sdims > 1 else 1 - desc.image_depth = shape[2] if sdims > 2 else 1 - desc.image_array_size = desc.image_depth - - @_write_only_property - def pitches(self, pitches): - pdims = len(pitches) - if pdims > 2: - raise LogicError("pitches has too many components", - status_code.INVALID_VALUE, "transfer") - desc = self.ptr - desc.image_row_pitch = pitches[0] if pdims > 0 else 1 - desc.image_slice_pitch = pitches[1] if pdims > 1 else 1 - - @_write_only_property - def buffer(self, buff): - self.ptr.buffer = buff.ptr.int_ptr if buff else _ffi.NULL - -# }}} - - -# {{{ Image - -_int_dtype = ({ - 8: np.int64, - 4: np.int32, - 2: np.int16, - 1: np.int8, -})[_ffi.sizeof('int')] - -_uint_dtype = ({ - 8: np.uint64, - 4: np.uint32, - 2: np.uint16, - 1: np.uint8, -})[_ffi.sizeof('unsigned')] - -_float_dtype = ({ - 8: np.float64, - 4: np.float32, - 2: np.float16, -})[_ffi.sizeof('float')] - -_fill_dtype_dict = { - _lib.TYPE_INT: _int_dtype, - _lib.TYPE_UINT: _uint_dtype, - _lib.TYPE_FLOAT: _float_dtype, - } - - -class Image(MemoryObject): - _id = 'image' - - def __init_dispatch(self, *args): - if len(args) == 5: - # >= 1.2 - self.__init_1_2(*args) - elif len(args) == 6: - # <= 1.1 - self.__init_legacy(*args) - else: - assert False - self._fill_type = _fill_dtype_dict[_lib.image__get_fill_type(self.ptr)] - - def __init_1_2(self, context, flags, fmt, desc, hostbuf): - MemoryObject.__init__(self, hostbuf) - c_buf, size, retained_buf = self._handle_buf_flags(flags) - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_image_from_desc(ptr, context.ptr, flags, - fmt.ptr, desc.ptr, c_buf)) - self.ptr = ptr[0] - - def __init_legacy(self, context, flags, fmt, shape, pitches, hostbuf): - if shape is None: - raise LogicError("'shape' must be given", - status_code.INVALID_VALUE, "Image") - MemoryObject.__init__(self, hostbuf) - c_buf, size, retained_buf = self._handle_buf_flags(flags) - dims = len(shape) - if dims == 2: - width, height = shape - pitch = 0 - if pitches is not None: - try: - pitch, = pitches - except ValueError: - raise LogicError("invalid length of pitch tuple", - status_code.INVALID_VALUE, "Image") - - # check buffer size - if (hostbuf is not None and - max(pitch, width * fmt.itemsize) * height > size): - raise LogicError("buffer too small", - status_code.INVALID_VALUE, "Image") - - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_image_2d(ptr, context.ptr, flags, fmt.ptr, - width, height, pitch, c_buf)) - self.ptr = ptr[0] - elif dims == 3: - width, height, depth = shape - pitch_x, pitch_y = 0, 0 - if pitches is not None: - try: - pitch_x, pitch_y = pitches - except ValueError: - raise LogicError("invalid length of pitch tuple", - status_code.INVALID_VALUE, "Image") - - # check buffer size - if (hostbuf is not None and - (max(max(pitch_x, width * fmt.itemsize) * - height, pitch_y) * depth > size)): - raise LogicError("buffer too small", - status_code.INVALID_VALUE, "Image") - - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_image_3d( - ptr, context.ptr, flags, fmt.ptr, - width, height, depth, pitch_x, pitch_y, c_buf)) - - self.ptr = ptr[0] - else: - raise LogicError("invalid dimension", - status_code.INVALID_VALUE, "Image") - - def __init__(self, context, flags, format, shape=None, pitches=None, - hostbuf=None, is_array=False, buffer=None): - - if shape is None and hostbuf is None: - raise Error("'shape' must be passed if 'hostbuf' is not given") - - if shape is None and hostbuf is not None: - shape = hostbuf.shape - - if hostbuf is not None and not \ - (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)): - from warnings import warn - warn("'hostbuf' was passed, but no memory flags to make use of it.") - - if hostbuf is None and pitches is not None: - raise Error("'pitches' may only be given if 'hostbuf' is given") - - if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2): - if buffer is not None and is_array: - raise ValueError( - "'buffer' and 'is_array' are mutually exclusive") - - if len(shape) == 3: - if buffer is not None: - raise TypeError( - "'buffer' argument is not supported for 3D arrays") - elif is_array: - image_type = mem_object_type.IMAGE2D_ARRAY - else: - image_type = mem_object_type.IMAGE3D - - elif len(shape) == 2: - if buffer is not None: - raise TypeError( - "'buffer' argument is not supported for 2D arrays") - elif is_array: - image_type = mem_object_type.IMAGE1D_ARRAY - else: - image_type = mem_object_type.IMAGE2D - - elif len(shape) == 1: - if buffer is not None: - image_type = mem_object_type.IMAGE1D_BUFFER - elif is_array: - raise TypeError("array of zero-dimensional images not supported") - else: - image_type = mem_object_type.IMAGE1D - - else: - raise ValueError("images cannot have more than three dimensions") - - desc = ImageDescriptor() - - desc.image_type = image_type - desc.shape = shape # also sets desc.array_size - - if pitches is None: - desc.pitches = (0, 0) - else: - desc.pitches = pitches - - desc.num_mip_levels = 0 # per CL 1.2 spec - desc.num_samples = 0 # per CL 1.2 spec - desc.buffer = buffer - - self.__init_dispatch(context, flags, format, desc, hostbuf) - else: - # legacy init for CL 1.1 and older - if is_array: - raise TypeError("'is_array=True' is not supported for CL < 1.2") - # if num_mip_levels is not None: - # raise TypeError( - # "'num_mip_levels' argument is not supported for CL < 1.2") - # if num_samples is not None: - # raise TypeError( - # "'num_samples' argument is not supported for CL < 1.2") - if buffer is not None: - raise TypeError("'buffer' argument is not supported for CL < 1.2") - - self.__init_dispatch(context, flags, format, shape, - pitches, hostbuf) - - def get_image_info(self, param): - info = _ffi.new('generic_info*') - _handle_error(_lib.image__get_image_info(self.ptr, param, info)) - return _generic_info_to_python(info) - - @property - def shape(self): - if self.type == mem_object_type.IMAGE2D: - return (self.width, self.height) - elif self.type == mem_object_type.IMAGE3D: - return (self.width, self.height, self.depth) - else: - raise LogicError("only images have shapes") - - -class _ImageInfoGetter: - def __init__(self, event): - from warnings import warn - warn("Image.image.attr is deprecated. " - "Use Image.attr directly, instead.") - - self.event = event - - def __getattr__(self, name): - try: - inf_attr = getattr(image_info, name.upper()) - except AttributeError: - raise AttributeError("%s has no attribute '%s'" - % (type(self), name)) - else: - return self.event.get_image_info(inf_attr) - - -Image.image = property(_ImageInfoGetter) - -# }}} - - -# {{{ Sampler - -class Sampler(_Common, _CLKernelArg): - _id = 'sampler' - - def __init__(self, context, normalized_coords, addressing_mode, filter_mode): - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_sampler( - ptr, context.ptr, normalized_coords, addressing_mode, filter_mode)) - self.ptr = ptr[0] - -# }}} - - -# {{{ GLTexture - -class GLTexture(Image, _GLObject): - _id = 'gl_texture' - - def __init__(self, context, flags, texture_target, miplevel, texture, dims=None): - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_from_gl_texture( - ptr, context.ptr, flags, texture_target, miplevel, texture)) - self.ptr = ptr[0] - -# }}} - - -# {{{ DeviceTopologyAmd - -class DeviceTopologyAmd(object): - # Hack around fmt.__dict__ check in test_wrapper.py - __dict__ = {} - __slots__ = ('ptr',) - - def __init__(self, bus=0, device=0, function=0): - self.ptr = _ffi.new("cl_device_topology_amd*") - self.bus = bus - self.device = device - self.function = function - - def _check_range(self, value, prop=None): - if (value < -127) or (value > 127): - raise ValueError("Value %s not in range [-127, 127].") - - @_cffi_property('pcie') - def _pcie(self): - return self.ptr - - @property - def bus(self): - return self._pcie.bus - - @bus.setter - def bus(self, value): - self._check_range(value) - self._pcie.bus = value - - @property - def device(self): - return self._pcie.device - - @device.setter - def device(self, value): - self._pcie.device = value - - @property - def function(self): - return self._pcie.function - - @function.setter - def function(self, value): - self._pcie.function = value - -# }}} - - -# {{{ get_info monkeypatchery - -def add_get_info_attrs(cls, info_method, info_class, cacheable_attrs=None): - if cacheable_attrs is None: - cacheable_attrs = [] - - def make_getinfo(info_method, info_name, info_attr): - def result(self): - return info_method(self, info_attr) - - return property(result) - - def make_cacheable_getinfo(info_method, info_name, cache_attr, info_attr): - def result(self): - try: - return getattr(self, cache_attr) - except AttributeError: - pass - - result = info_method(self, info_attr) - setattr(self, cache_attr, result) - return result - - return property(result) - - for info_name, info_value in six.iteritems(info_class.__dict__): - if info_name == "to_string" or info_name.startswith("_"): - continue - - info_lower = info_name.lower() - info_constant = getattr(info_class, info_name) - if info_name in cacheable_attrs: - cache_attr = intern("_info_cache_" + info_lower) - setattr(cls, info_lower, make_cacheable_getinfo( - info_method, info_lower, cache_attr, info_constant)) - else: - setattr(cls, info_lower, make_getinfo( - info_method, info_name, info_constant)) - - -add_get_info_attrs(Platform, Platform.get_info, platform_info), -add_get_info_attrs(Device, Device.get_info, device_info, - ["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"]) -add_get_info_attrs(Context, Context.get_info, context_info) -add_get_info_attrs(CommandQueue, CommandQueue.get_info, command_queue_info, - ["CONTEXT", "DEVICE"]) -add_get_info_attrs(Event, Event.get_info, event_info) -add_get_info_attrs(MemoryObjectHolder, MemoryObjectHolder.get_info, mem_info) -add_get_info_attrs(Image, Image.get_image_info, image_info) -add_get_info_attrs(Kernel, Kernel.get_info, kernel_info) -add_get_info_attrs(Sampler, Sampler.get_info, sampler_info) - -# }}} - - -if have_gl(): - def gl_object_get_gl_object(self): - return self.get_gl_object_info()[1] - - GLBuffer.gl_object = property(gl_object_get_gl_object) - GLTexture.gl_object = property(gl_object_get_gl_object) - -# vim: foldmethod=marker diff --git a/pyopencl/mempool.py b/pyopencl/mempool.py deleted file mode 100644 index 6b1740ec..00000000 --- a/pyopencl/mempool.py +++ /dev/null @@ -1,275 +0,0 @@ -from __future__ import division -from __future__ import absolute_import -import six - -__copyright__ = """ -Copyright (C) 2014 Andreas Kloeckner -""" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - - -import numpy as np -import pyopencl as cl -from pyopencl.tools import bitlog2 - - -# {{{ allocators - -class AllocatorBase(object): - def __call__(self, nbytes): - try_count = 0 - - while try_count < 2: - try: - return self.allocate(nbytes) - except cl.Error as e: - if not e.is_out_of_memory(): - raise - try_count += 1 - if try_count == 2: - raise - - self.try_release_blocks() - - def try_release_blocks(self): - import gc - gc.collect() - - def free(self, buf): - buf.release() - - -class DeferredAllocator(AllocatorBase): - is_deferred = True - - def __init__(self, context, mem_flags=cl.mem_flags.READ_WRITE): - self.context = context - self.mem_flags = mem_flags - - def allocate(self, nbytes): - return cl.Buffer(self.context, self.mem_flags, nbytes) - - -_zero = np.array([0, 0, 0, 0], dtype=np.int8) - - -class ImmediateAllocator(AllocatorBase): - is_deferred = False - - def __init__(self, queue, mem_flags=cl.mem_flags.READ_WRITE): - self.context = queue.context - self.queue = queue - self.mem_flags = mem_flags - - def allocate(self, nbytes): - buf = cl.Buffer(self.context, self.mem_flags, nbytes) - - # Make sure the buffer gets allocated right here and right now. - # This looks (and is) expensive. But immediate allocators - # have their main use in memory pools, whose basic assumption - # is that allocation is too expensive anyway--but they rely - # on exact 'out-of-memory' information. - - from pyopencl.cffi_cl import _enqueue_write_buffer - _enqueue_write_buffer( - self.queue, buf, - _zero[:min(len(_zero), nbytes)], - is_blocking=False) - - # No need to wait for completion here. clWaitForEvents (e.g.) - # cannot return mem object allocation failures. This implies that - # the buffer is faulted onto the device on enqueue. - - return buf - -# }}} - - -# {{{ memory pool - -class MemoryPool(object): - mantissa_bits = 2 - mantissa_mask = (1 << mantissa_bits) - 1 - - def __init__(self, allocator): - self.allocator = allocator - - self.bin_nr_to_bin = {} - - if self.allocator.is_deferred: - from warnings import warn - warn("Memory pools expect non-deferred " - "semantics from their allocators. You passed a deferred " - "allocator, i.e. an allocator whose allocations can turn out to " - "be unavailable long after allocation.", statcklevel=2) - - self.active_blocks = 0 - - self.stop_holding_flag = False - - @classmethod - def bin_number(cls, size): - bl2 = bitlog2(size) - - mantissa_bits = cls.mantissa_bits - if bl2 >= mantissa_bits: - shifted = size >> (bl2 - mantissa_bits) - else: - shifted = size << (mantissa_bits - bl2) - - assert not (size and (shifted & (1 << mantissa_bits)) == 0) - - chopped = shifted & cls.mantissa_mask - - return bl2 << mantissa_bits | chopped - - @classmethod - def alloc_size(cls, bin_nr): - mantissa_bits = cls.mantissa_bits - - exponent = bin_nr >> mantissa_bits - mantissa = bin_nr & cls.mantissa_mask - - exp_minus_mbits = exponent-mantissa_bits - if exp_minus_mbits >= 0: - ones = (1 << exp_minus_mbits) - 1 - head = ((1 << mantissa_bits) | mantissa) << exp_minus_mbits - else: - ones = 0 - head = ((1 << mantissa_bits) | mantissa) >> -exp_minus_mbits - - assert not (ones & head) - return head | ones - - def stop_holding(self): - self.stop_holding_flag = True - self.free_held() - - def free_held(self): - for bin_nr, bin_list in six.iteritems(self.bin_nr_to_bin): - while bin_list: - self.allocator.free(bin_list.pop()) - - @property - def held_blocks(self): - return sum( - len(bin_list) - for bin_list in six.itervalues(self.bin_nr_to_bin)) - - def allocate(self, size): - bin_nr = self.bin_number(size) - bin_list = self.bin_nr_to_bin.setdefault(bin_nr, []) - - alloc_sz = self.alloc_size(bin_nr) - - if bin_list: - # if (m_trace) - # std::cout - # << "[pool] allocation of size " << size - # << " served from bin " << bin_nr - # << " which contained " << bin_list.size() - # << " entries" << std::endl; - self.active_blocks += 1 - return PooledBuffer(self, bin_list.pop(), alloc_sz) - - assert self.bin_number(alloc_sz) == bin_nr - - # if (m_trace) - # std::cout << "[pool] allocation of size " << size - # << " required new memory" << std::endl; - - try: - result = self.allocator(alloc_sz) - self.active_blocks += 1 - return PooledBuffer(self, result, alloc_sz) - except cl.MemoryError: - pass - - # if (m_trace) - # std::cout << "[pool] allocation triggered OOM, running GC" << std::endl; - - self.allocator.try_release_blocks() - - if bin_list: - return bin_list.pop() - - # if (m_trace) - # std::cout << "[pool] allocation still OOM after GC" << std::endl; - - for _ in self._try_to_free_memory(): - try: - result = self.allocator(alloc_sz) - self.active_blocks += 1 - return PooledBuffer(self, result, alloc_sz) - except cl.MemoryError: - pass - - raise cl.MemoryError( - "failed to free memory for allocation", - routine="memory_pool::allocate", - code=cl.status_code.MEM_OBJECT_ALLOCATION_FAILURE) - - __call__ = allocate - - def free(self, buf, size): - self.active_blocks -= 1 - bin_nr = self.bin_number(size) - - if not self.stop_holding_flag: - self.bin_nr_to_bin.setdefault(bin_nr, []).append(buf) - - # if (m_trace) - # std::cout << "[pool] block of size " << size << " returned to bin " - # << bin_nr << " which now contains " << get_bin(bin_nr).size() - # << " entries" << std::endl; - else: - self.allocator.free(buf) - - def _try_to_free_memory(self): - for bin_nr, bin_list in six.iteritems(self.bin_nr_to_bin): - while bin_list: - self.allocator.free(bin_list.pop()) - yield - - -class PooledBuffer(cl.MemoryObjectHolder): - _id = 'buffer' - - def __init__(self, pool, buf, alloc_sz): - self.pool = pool - self.buf = buf - self.ptr = buf.ptr - self._alloc_sz = alloc_sz - - def release(self): - self.pool.free(self.buf, self._alloc_sz) - self.buf = None - self.ptr = None - - def __del__(self): - if self.buf is not None: - self.release() - -# }}} - - -# vim: foldmethod=marker diff --git a/pyopencl/tools.py b/pyopencl/tools.py index a3c577ef..05ccc5d0 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -35,7 +35,7 @@ import numpy as np from decorator import decorator import pyopencl as cl from pytools import memoize, memoize_method -from pyopencl.cffi_cl import _lib +from pyopencl._cl import bitlog2 # noqa: F401 from pytools.persistent_dict import KeyBuilder as KeyBuilderBase import re @@ -60,9 +60,11 @@ _register_types() # {{{ imported names -bitlog2 = _lib.bitlog2 -from pyopencl.mempool import ( # noqa - PooledBuffer, DeferredAllocator, ImmediateAllocator, MemoryPool) +from pyopencl._cl import ( # noqa + PooledBuffer as PooledBuffer, + _tools_DeferredAllocator as DeferredAllocator, + _tools_ImmediateAllocator as ImmediateAllocator, + MemoryPool as MemoryPool) # }}} diff --git a/setup.py b/setup.py index 0fbe8c1d..b1b3cbdc 100644 --- a/setup.py +++ b/setup.py @@ -107,7 +107,7 @@ def main(): check_git_submodules, NumpyExtension) check_git_submodules() - hack_distutils() + hack_distutils(debug=True) conf = get_config(get_config_schema(), warn_about_no_config=False) @@ -221,7 +221,7 @@ def main(): packages=find_packages(), ext_modules=[ - NumpyExtension("_cl", + NumpyExtension("pyopencl._cl", [ "src/wrap_cl.cpp", "src/wrap_cl_part_1.cpp", diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index def89e6e..297186de 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -940,7 +940,6 @@ namespace pyopencl PYOPENCL_CALL_GUARDED(clRetainContext, (ctx)); } - ~context() { PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseContext, diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index 318ba1a9..5c7bf70b 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -13,7 +13,7 @@ void pyopencl_expose_part_1(py::module &m) { typedef platform cls; - py::class_(m, "Platform") + py::class_(m, "Platform", py::dynamic_attr()) .DEF_SIMPLE_METHOD(get_info) .def("get_devices", &cls::get_devices, py::arg("device_type")=CL_DEVICE_TYPE_ALL) @@ -29,7 +29,7 @@ void pyopencl_expose_part_1(py::module &m) // {{{ device { typedef device cls; - py::class_(m, "Device") + py::class_(m, "Device", py::dynamic_attr()) .DEF_SIMPLE_METHOD(get_info) .def(py::self == py::self) .def(py::self != py::self) @@ -50,12 +50,21 @@ void pyopencl_expose_part_1(py::module &m) { typedef context cls; - py::class_>(m, "Context") + py::class_>(m, "Context", py::dynamic_attr()) .def( py::init( [](py::object py_devices, py::object py_properties, - py::object py_dev_type) + py::object py_dev_type, py::object cache_dir) { + if (cache_dir.ptr() != Py_None) + { + py::object w = py::module::import("warnings"); + w.attr("warn")("The 'cache_dir' argument to the Context constructor " + "is deprecated and no longer has an effect. " + "It was removed because it only applied to the wrapper " + "object and not the context itself, leading to inconsistencies."); + } + PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( return create_context_inner( py_devices, @@ -63,9 +72,10 @@ void pyopencl_expose_part_1(py::module &m) py_dev_type); ) }), - py::arg("devices")=py::object(), - py::arg("properties")=py::object(), - py::arg("dev_type")=py::object() + py::arg("devices")=py::none(), + py::arg("properties")=py::none(), + py::arg("dev_type")=py::none(), + py::arg("cache_dir")=py::none() ) .DEF_SIMPLE_METHOD(get_info) .def(py::self == py::self) @@ -80,12 +90,12 @@ void pyopencl_expose_part_1(py::module &m) // {{{ command queue { typedef command_queue cls; - py::class_(m, "CommandQueue") + py::class_(m, "CommandQueue", py::dynamic_attr()) .def( py::init(), py::arg("context"), - py::arg("device")=py::object(), + py::arg("device")=py::none(), py::arg("properties")=0) .DEF_SIMPLE_METHOD(get_info) #if PYOPENCL_CL_VERSION < 0x1010 @@ -105,7 +115,7 @@ void pyopencl_expose_part_1(py::module &m) // {{{ events/synchronization { typedef event cls; - py::class_(m, "Event") + py::class_(m, "Event", py::dynamic_attr()) .DEF_SIMPLE_METHOD(get_info) .DEF_SIMPLE_METHOD(get_profiling_info) .DEF_SIMPLE_METHOD(wait) @@ -117,7 +127,7 @@ void pyopencl_expose_part_1(py::module &m) } { typedef nanny_event cls; - py::class_(m, "NannyEvent") + py::class_(m, "NannyEvent", py::dynamic_attr()) .DEF_SIMPLE_METHOD(get_ward) ; } @@ -126,18 +136,18 @@ void pyopencl_expose_part_1(py::module &m) #if PYOPENCL_CL_VERSION >= 0x1020 m.def("_enqueue_marker_with_wait_list", enqueue_marker_with_wait_list, - py::arg("queue"), py::arg("wait_for")=py::object() + py::arg("queue"), py::arg("wait_for")=py::none() ); #endif m.def("_enqueue_marker", enqueue_marker, py::arg("queue") ); m.def("_enqueue_wait_for_events", enqueue_wait_for_events, - py::arg("queue"), py::arg("wait_for")=py::object()); + py::arg("queue"), py::arg("wait_for")=py::none()); #if PYOPENCL_CL_VERSION >= 0x1020 m.def("_enqueue_barrier_with_wait_list", enqueue_barrier_with_wait_list, - py::arg("queue"), py::arg("wait_for")=py::object() + py::arg("queue"), py::arg("wait_for")=py::none() ); #endif m.def("_enqueue_barrier", enqueue_barrier, py::arg("queue")); @@ -145,7 +155,7 @@ void pyopencl_expose_part_1(py::module &m) #if PYOPENCL_CL_VERSION >= 0x1010 { typedef user_event cls; - py::class_(m, "UserEvent") + py::class_(m, "UserEvent", py::dynamic_attr()) .def(py::init( [](context &ctx) { @@ -163,7 +173,7 @@ void pyopencl_expose_part_1(py::module &m) { typedef memory_object_holder cls; - py::class_(m, "MemoryObjectHolder") + py::class_(m, "MemoryObjectHolder", py::dynamic_attr()) .DEF_SIMPLE_METHOD(get_info) .def("get_host_array", get_mem_obj_host_array, py::arg("shape"), @@ -182,7 +192,7 @@ void pyopencl_expose_part_1(py::module &m) } { typedef memory_object cls; - py::class_(m, "MemoryObject") + py::class_(m, "MemoryObject", py::dynamic_attr()) .DEF_SIMPLE_METHOD(release) .def_property_readonly("hostbuf", &cls::hostbuf) @@ -200,7 +210,7 @@ void pyopencl_expose_part_1(py::module &m) py::arg("queue"), py::arg("mem_objects"), py::arg("flags")=0, - py::arg("wait_for")=py::object() + py::arg("wait_for")=py::none() ); #endif @@ -209,7 +219,7 @@ void pyopencl_expose_part_1(py::module &m) py::arg("queue"), py::arg("mem_objects"), py::arg("flags")=0, - py::arg("wait_for")=py::object() + py::arg("wait_for")=py::none() ); #endif // }}} @@ -217,7 +227,7 @@ void pyopencl_expose_part_1(py::module &m) // {{{ buffer { typedef buffer cls; - py::class_(m, "Buffer") + py::class_(m, "Buffer", py::dynamic_attr()) .def( py::init( [](context &ctx, cl_mem_flags flags, size_t size, py::object py_hostbuf) @@ -226,7 +236,7 @@ void pyopencl_expose_part_1(py::module &m) py::arg("context"), py::arg("flags"), py::arg("size")=0, - py::arg("hostbuf")=py::object() + py::arg("hostbuf")=py::none() ) #if PYOPENCL_CL_VERSION >= 0x1010 .def("get_sub_region", &cls::get_sub_region, @@ -249,7 +259,7 @@ void pyopencl_expose_part_1(py::module &m) py::arg("mem"), py::arg("hostbuf"), py::arg("device_offset")=0, - py::arg("wait_for")=py::object(), + py::arg("wait_for")=py::none(), py::arg("is_blocking")=true ); m.def("_enqueue_write_buffer", enqueue_write_buffer, @@ -257,7 +267,7 @@ void pyopencl_expose_part_1(py::module &m) py::arg("mem"), py::arg("hostbuf"), py::arg("device_offset")=0, - py::arg("wait_for")=py::object(), + py::arg("wait_for")=py::none(), py::arg("is_blocking")=true ); m.def("_enqueue_copy_buffer", enqueue_copy_buffer, @@ -267,7 +277,7 @@ void pyopencl_expose_part_1(py::module &m) py::arg("byte_count")=-1, py::arg("src_offset")=0, py::arg("dst_offset")=0, - py::arg("wait_for")=py::object() + py::arg("wait_for")=py::none() ); // }}} @@ -282,9 +292,9 @@ void pyopencl_expose_part_1(py::module &m) py::arg("buffer_origin"), py::arg("host_origin"), py::arg("region"), - py::arg("buffer_pitches")=py::object(), - py::arg("host_pitches")=py::object(), - py::arg("wait_for")=py::object(), + py::arg("buffer_pitches")=py::none(), + py::arg("host_pitches")=py::none(), + py::arg("wait_for")=py::none(), py::arg("is_blocking")=true ); m.def("_enqueue_write_buffer_rect", enqueue_write_buffer_rect, @@ -294,9 +304,9 @@ void pyopencl_expose_part_1(py::module &m) py::arg("buffer_origin"), py::arg("host_origin"), py::arg("region"), - py::arg("buffer_pitches")=py::object(), - py::arg("host_pitches")=py::object(), - py::arg("wait_for")=py::object(), + py::arg("buffer_pitches")=py::none(), + py::arg("host_pitches")=py::none(), + py::arg("wait_for")=py::none(), py::arg("is_blocking")=true ); m.def("_enqueue_copy_buffer_rect", enqueue_copy_buffer_rect, @@ -306,9 +316,9 @@ void pyopencl_expose_part_1(py::module &m) py::arg("src_origin"), py::arg("dst_origin"), py::arg("region"), - py::arg("src_pitches")=py::object(), - py::arg("dst_pitches")=py::object(), - py::arg("wait_for")=py::object() + py::arg("src_pitches")=py::none(), + py::arg("dst_pitches")=py::none(), + py::arg("wait_for")=py::none() ); #endif @@ -320,7 +330,7 @@ void pyopencl_expose_part_1(py::module &m) m.def("_enqueue_fill_buffer", enqueue_fill_buffer, py::arg("queue"), py::arg("mem"), py::arg("pattern"), py::arg("offset"), py::arg("size"), - py::arg("wait_for")=py::object()); + py::arg("wait_for")=py::none()); #endif } diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index e4360882..f51e8a7b 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -7,7 +7,7 @@ namespace pyopencl { #if PYOPENCL_CL_VERSION >= 0x1020 py::object image_desc_dummy_getter(cl_image_desc &desc) { - return py::object(); + return py::none(); } void image_desc_set_shape(cl_image_desc &desc, py::object py_shape) @@ -66,7 +66,7 @@ void pyopencl_expose_part_2(py::module &m) { typedef image cls; - py::class_(m, "Image"/* , py::no_init */) + py::class_(m, "Image", py::dynamic_attr()) .def( py::init( []( @@ -82,9 +82,9 @@ void pyopencl_expose_part_2(py::module &m) py::arg("context"), py::arg("flags"), py::arg("format"), - py::arg("shape")=py::object(), - py::arg("pitches")=py::object(), - py::arg("hostbuf")=py::object() + py::arg("shape")=py::none(), + py::arg("pitches")=py::none(), + py::arg("hostbuf")=py::none() ) #if PYOPENCL_CL_VERSION >= 0x1020 .def( @@ -102,7 +102,7 @@ void pyopencl_expose_part_2(py::module &m) py::arg("flags"), py::arg("format"), py::arg("desc"), - py::arg("hostbuf")=py::object() + py::arg("hostbuf")=py::none() ) #endif .DEF_SIMPLE_METHOD(get_image_info) @@ -136,7 +136,7 @@ void pyopencl_expose_part_2(py::module &m) py::arg("hostbuf"), py::arg("row_pitch")=0, py::arg("slice_pitch")=0, - py::arg("wait_for")=py::object(), + py::arg("wait_for")=py::none(), py::arg("is_blocking")=true ); m.def("_enqueue_write_image", enqueue_write_image, @@ -147,7 +147,7 @@ void pyopencl_expose_part_2(py::module &m) py::arg("hostbuf"), py::arg("row_pitch")=0, py::arg("slice_pitch")=0, - py::arg("wait_for")=py::object(), + py::arg("wait_for")=py::none(), py::arg("is_blocking")=true ); @@ -158,7 +158,7 @@ void pyopencl_expose_part_2(py::module &m) py::arg("src_origin"), py::arg("dest_origin"), py::arg("region"), - py::arg("wait_for")=py::object() + py::arg("wait_for")=py::none() ); m.def("_enqueue_copy_image_to_buffer", enqueue_copy_image_to_buffer, py::arg("queue"), @@ -167,7 +167,7 @@ void pyopencl_expose_part_2(py::module &m) py::arg("origin"), py::arg("region"), py::arg("offset"), - py::arg("wait_for")=py::object() + py::arg("wait_for")=py::none() ); m.def("_enqueue_copy_buffer_to_image", enqueue_copy_buffer_to_image, py::arg("queue"), @@ -176,7 +176,7 @@ void pyopencl_expose_part_2(py::module &m) py::arg("offset"), py::arg("origin"), py::arg("region"), - py::arg("wait_for")=py::object() + py::arg("wait_for")=py::none() ); #if PYOPENCL_CL_VERSION >= 0x1020 @@ -186,7 +186,7 @@ void pyopencl_expose_part_2(py::module &m) py::arg("color"), py::arg("origin"), py::arg("region"), - py::arg("wait_for")=py::object() + py::arg("wait_for")=py::none() ); #endif @@ -195,10 +195,10 @@ void pyopencl_expose_part_2(py::module &m) // {{{ memory_map { typedef memory_map cls; - py::class_(m, "MemoryMap") + py::class_(m, "MemoryMap", py::dynamic_attr()) .def("release", &cls::release, py::arg("queue")=0, - py::arg("wait_for")=py::object() + py::arg("wait_for")=py::none() ) ; } @@ -211,8 +211,8 @@ void pyopencl_expose_part_2(py::module &m) py::arg("shape"), py::arg("dtype"), py::arg("order")="C", - py::arg("strides")=py::object(), - py::arg("wait_for")=py::object(), + py::arg("strides")=py::none(), + py::arg("wait_for")=py::none(), py::arg("is_blocking")=true); m.def("enqueue_map_image", enqueue_map_image, py::arg("queue"), @@ -223,8 +223,8 @@ void pyopencl_expose_part_2(py::module &m) py::arg("shape"), py::arg("dtype"), py::arg("order")="C", - py::arg("strides")=py::object(), - py::arg("wait_for")=py::object(), + py::arg("strides")=py::none(), + py::arg("wait_for")=py::none(), py::arg("is_blocking")=true); // }}} @@ -232,7 +232,7 @@ void pyopencl_expose_part_2(py::module &m) // {{{ sampler { typedef sampler cls; - py::class_(m, "Sampler") + py::class_(m, "Sampler", py::dynamic_attr()) .def(py::init()) .DEF_SIMPLE_METHOD(get_info) .def(py::self == py::self) @@ -253,7 +253,7 @@ void pyopencl_expose_part_2(py::module &m) .value("BINARY", cls::KND_BINARY) ; - py::class_(m, "_Program") + py::class_(m, "_Program", py::dynamic_attr()) .def( py::init( [](context &ctx, std::string const &src) @@ -285,17 +285,17 @@ void pyopencl_expose_part_2(py::module &m) .DEF_SIMPLE_METHOD(get_build_info) .def("_build", &cls::build, py::arg("options")="", - py::arg("devices")=py::object()) + py::arg("devices")=py::none()) #if PYOPENCL_CL_VERSION >= 0x1020 .def("compile", &cls::compile, py::arg("options")="", - py::arg("devices")=py::object(), + py::arg("devices")=py::none(), py::arg("headers")=py::list()) .def_static("link", &link_program, py::arg("context"), py::arg("programs"), py::arg("options")="", - py::arg("devices")=py::object() + py::arg("devices")=py::none() ) #endif .def(py::self == py::self) @@ -316,7 +316,7 @@ void pyopencl_expose_part_2(py::module &m) { typedef kernel cls; - py::class_(m, "Kernel") + py::class_(m, "Kernel", py::dynamic_attr()) .def(py::init()) .DEF_SIMPLE_METHOD(get_info) .DEF_SIMPLE_METHOD(get_work_group_info) @@ -333,7 +333,7 @@ void pyopencl_expose_part_2(py::module &m) { typedef local_memory cls; - py::class_(m, "LocalMemory") + py::class_(m, "LocalMemory", py::dynamic_attr()) .def( py::init(), py::arg("size")) @@ -347,14 +347,14 @@ void pyopencl_expose_part_2(py::module &m) py::arg("kernel"), py::arg("global_work_size"), py::arg("local_work_size"), - py::arg("global_work_offset")=py::object(), - py::arg("wait_for")=py::object(), + py::arg("global_work_offset")=py::none(), + py::arg("wait_for")=py::none(), py::arg("g_times_l")=false ); m.def("enqueue_task", enqueue_task, py::arg("queue"), py::arg("kernel"), - py::arg("wait_for")=py::object() + py::arg("wait_for")=py::none() ); // TODO: clEnqueueNativeKernel @@ -371,7 +371,7 @@ void pyopencl_expose_part_2(py::module &m) { typedef gl_buffer cls; - py::class_(m, "GLBuffer") + py::class_(m, "GLBuffer", py::dynamic_attr()) .def( py::init( [](context &ctx, cl_mem_flags flags, GLuint bufobj) @@ -387,7 +387,7 @@ void pyopencl_expose_part_2(py::module &m) { typedef gl_renderbuffer cls; - py::class_(m, "GLRenderBuffer") + py::class_(m, "GLRenderBuffer", py::dynamic_attr()) .def( py::init( [](context &ctx, cl_mem_flags flags, GLuint bufobj) @@ -403,7 +403,7 @@ void pyopencl_expose_part_2(py::module &m) { typedef gl_texture cls; - py::class_(m, "GLTexture") + py::class_(m, "GLTexture", py::dynamic_attr()) .def( py::init( [](context &ctx, cl_mem_flags flags, GLenum texture_target, @@ -425,19 +425,19 @@ void pyopencl_expose_part_2(py::module &m) m.def("enqueue_acquire_gl_objects", enqueue_acquire_gl_objects, py::arg("queue"), py::arg("mem_objects"), - py::arg("wait_for")=py::object() + py::arg("wait_for")=py::none() ); m.def("enqueue_release_gl_objects", enqueue_release_gl_objects, py::arg("queue"), py::arg("mem_objects"), - py::arg("wait_for")=py::object() + py::arg("wait_for")=py::none() ); #if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) m.def("get_gl_context_info_khr", get_gl_context_info_khr, py::arg("properties"), py::arg("param_name"), - py::arg("platform")=py::object() + py::arg("platform")=py::none() ); #endif -- GitLab From bb34f9058d453d0507726ed4e3c23dcb1be6be4c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 9 Aug 2018 19:46:30 -0500 Subject: [PATCH 13/92] [pybind11] basic demo executes again --- pyopencl/__init__.py | 306 +++++------------------------------------ pyopencl/invoker.py | 30 ++-- src/wrap_cl_part_1.cpp | 1 + src/wrap_cl_part_2.cpp | 2 + 4 files changed, 50 insertions(+), 289 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index f8ba8ccb..10c33c73 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -195,10 +195,18 @@ from pyopencl._cl import ( # noqa import inspect as _inspect -CONSTANT_CLASSES = [ +CONSTANT_CLASSES = tuple( getattr(_cl, name) for name in dir(_cl) if _inspect.isclass(getattr(_cl, name)) - and name[0].islower() and name not in ["zip", "map", "range"]] + and name[0].islower() and name not in ["zip", "map", "range"]) + +_KERNEL_ARG_CLASSES = ( + MemoryObjectHolder, + Sampler, + LocalMemory, + # FIXME + # SVM, + ) if _cl.have_gl(): @@ -806,161 +814,18 @@ def _add_functionality(): def kernel__setup(self, prg): self._source = getattr(prg, "_source", None) - self._generate_naive_call() + from pyopencl.invoker import generate_enqueue_and_set_args + self._enqueue, self._set_args = generate_enqueue_and_set_args( + self.function_name, self.num_args, self.num_args, + None, + warn_about_arg_count_bug=None, + work_around_arg_count_bug=None) + self._wg_info_cache = {} return self - def kernel_get_work_group_info(self, param, device): - try: - return self._wg_info_cache[param, device] - except KeyError: - pass - - result = kernel_old_get_work_group_info(self, param, device) - self._wg_info_cache[param, device] = result - return result - - # {{{ code generation for __call__, set_args - - def kernel__set_set_args_body(self, body, num_passed_args): - from pytools.py_codegen import ( - PythonFunctionGenerator, - PythonCodeGenerator, - Indentation) - - arg_names = ["arg%d" % i for i in range(num_passed_args)] - - # {{{ wrap in error handler - - err_gen = PythonCodeGenerator() - - def gen_error_handler(): - err_gen(""" - if current_arg is not None: - args = [{args}] - advice = "" - from pyopencl.array import Array - if isinstance(args[current_arg], Array): - advice = " (perhaps you meant to pass 'array.data' " \ - "instead of the array itself?)" - - raise _cl.LogicError( - "when processing argument #%d (1-based): %s%s" - % (current_arg+1, str(e), advice)) - else: - raise - """ - .format(args=", ".join(arg_names))) - err_gen("") - - err_gen("try:") - with Indentation(err_gen): - err_gen.extend(body) - err_gen("except TypeError as e:") - with Indentation(err_gen): - gen_error_handler() - err_gen("except _cl.LogicError as e:") - with Indentation(err_gen): - gen_error_handler() - - # }}} - - def add_preamble(gen): - gen.add_to_preamble( - "import numpy as np") - gen.add_to_preamble( - "import pyopencl._cl as _cl") - gen.add_to_preamble("from pyopencl import status_code") - gen.add_to_preamble("from struct import pack") - gen.add_to_preamble("") - - # {{{ generate _enqueue - - gen = PythonFunctionGenerator("enqueue_knl_%s" % self.function_name, - ["self", "queue", "global_size", "local_size"] - + arg_names - + ["global_offset=None", "g_times_l=None", "wait_for=None"]) - - add_preamble(gen) - gen.extend(err_gen) - - gen(""" - return _cl.enqueue_nd_range_kernel(queue, self, global_size, local_size, - global_offset, wait_for, g_times_l=g_times_l) - """) - - self._enqueue = gen.get_function() - - # }}} - - # {{{ generate set_args - - gen = PythonFunctionGenerator("_set_args", ["self"] + arg_names) - - add_preamble(gen) - gen.extend(err_gen) - - self._set_args = gen.get_function() - - # }}} - - def kernel__generate_buffer_arg_setter(self, gen, arg_idx, buf_var): - # (TODO: still needed?) - - # from pytools.py_codegen import Indentation - # - # if _CPY2: - # # https://github.com/numpy/numpy/issues/5381 - # gen("if isinstance({buf_var}, np.generic):".format(buf_var=buf_var)) - # with Indentation(gen): - # gen("{buf_var} = np.getbuffer({buf_var})".format(buf_var=buf_var)) - - gen(""" - kernel._set_arg_bytes({arg_idx}, {buf_var}) - """ - .format(arg_idx=arg_idx, buf_var=buf_var)) - - def kernel__generate_bytes_arg_setter(self, gen, arg_idx, buf_var): - gen(""" - self._set_arg_bytes({arg_idx}, {buf_var}) - """ - .format(arg_idx=arg_idx, buf_var=buf_var)) - - def kernel__generate_generic_arg_handler(self, gen, arg_idx, arg_var): - from pytools.py_codegen import Indentation - - gen(""" - if {arg_var} is None: - self._set_arg_null({arg_idx}) - elif isinstance({arg_var}, _CLKernelArg): - self.set_arg({arg_idx}, {arg_var}) - """ - .format(arg_idx=arg_idx, arg_var=arg_var)) - - gen("else:") - with Indentation(gen): - self._generate_buffer_arg_setter(gen, arg_idx, arg_var) - - def kernel__generate_naive_call(self): - num_args = self.num_args - - from pytools.py_codegen import PythonCodeGenerator - gen = PythonCodeGenerator() - - if num_args == 0: - gen("pass") - - for i in range(num_args): - gen("# process argument {arg_idx}".format(arg_idx=i)) - gen("") - gen("current_arg = {arg_idx}".format(arg_idx=i)) - self._generate_generic_arg_handler(gen, i, "arg%d" % i) - gen("") - - self._set_set_args_body(gen, num_args) - def kernel_set_scalar_arg_dtypes(self, scalar_arg_dtypes): - self._scalar_arg_dtypes = scalar_arg_dtypes + self._scalar_arg_dtypes = tuple(scalar_arg_dtypes) # {{{ arg counting bug handling @@ -974,7 +839,7 @@ def _add_functionality(): from pyopencl.characterize import has_struct_arg_count_bug count_bug_per_dev = [ - has_struct_arg_count_bug(dev) + has_struct_arg_count_bug(dev, self.context) for dev in self.context.devices] from pytools import single_valued @@ -984,119 +849,25 @@ def _add_functionality(): else: warn_about_arg_count_bug = True - fp_arg_count = 0 - # }}} - cl_arg_idx = 0 - - from pytools.py_codegen import PythonCodeGenerator - gen = PythonCodeGenerator() - - if not scalar_arg_dtypes: - gen("pass") - - for arg_idx, arg_dtype in enumerate(scalar_arg_dtypes): - gen("# process argument {arg_idx}".format(arg_idx=arg_idx)) - gen("") - gen("current_arg = {arg_idx}".format(arg_idx=arg_idx)) - arg_var = "arg%d" % arg_idx - - if arg_dtype is None: - self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var) - cl_arg_idx += 1 - gen("") - continue - - arg_dtype = np.dtype(arg_dtype) - - if arg_dtype.char == "V": - self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var) - cl_arg_idx += 1 - - elif arg_dtype.kind == "c": - if warn_about_arg_count_bug: - warn("{knl_name}: arguments include complex numbers, and " - "some (but not all) of the target devices mishandle " - "struct kernel arguments (hence the workaround is " - "disabled".format( - knl_name=self.function_name, stacklevel=2)) - - if arg_dtype == np.complex64: - arg_char = "f" - elif arg_dtype == np.complex128: - arg_char = "d" - else: - raise TypeError("unexpected complex type: %s" % arg_dtype) - - if (work_around_arg_count_bug == "pocl" - and arg_dtype == np.complex128 - and fp_arg_count + 2 <= 8): - gen( - "buf = pack('{arg_char}', {arg_var}.real)" - .format(arg_char=arg_char, arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - gen("current_arg = current_arg + 1000") - gen( - "buf = pack('{arg_char}', {arg_var}.imag)" - .format(arg_char=arg_char, arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - - elif (work_around_arg_count_bug == "apple" - and arg_dtype == np.complex128 - and fp_arg_count + 2 <= 8): - raise NotImplementedError("No work-around to " - "Apple's broken structs-as-kernel arg " - "handling has been found. " - "Cannot pass complex numbers to kernels.") - - else: - gen( - "buf = pack('{arg_char}{arg_char}', " - "{arg_var}.real, {arg_var}.imag)" - .format(arg_char=arg_char, arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - - fp_arg_count += 2 - - elif arg_dtype.char in "IL" and _CPY26: - # Prevent SystemError: ../Objects/longobject.c:336: bad - # argument to internal function - - gen( - "buf = pack('{arg_char}', long({arg_var}))" - .format(arg_char=arg_dtype.char, arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - - else: - if arg_dtype.kind == "f": - fp_arg_count += 1 - - arg_char = arg_dtype.char - arg_char = _type_char_map.get(arg_char, arg_char) - gen( - "buf = pack('{arg_char}', {arg_var})" - .format( - arg_char=arg_char, - arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - - gen("") - - if cl_arg_idx != self.num_args: - raise TypeError( - "length of argument list (%d) and " - "CL-generated number of arguments (%d) do not agree" - % (cl_arg_idx, self.num_args)) + from pyopencl.invoker import generate_enqueue_and_set_args + self._enqueue, self._set_args = generate_enqueue_and_set_args( + self.function_name, + len(scalar_arg_dtypes), self.num_args, + self._scalar_arg_dtypes, + warn_about_arg_count_bug=warn_about_arg_count_bug, + work_around_arg_count_bug=work_around_arg_count_bug) - self._set_set_args_body(gen, len(scalar_arg_dtypes)) + def kernel_get_work_group_info(self, param, device): + try: + return self._wg_info_cache[param, device] + except KeyError: + pass - # }}} + result = kernel_old_get_work_group_info(self, param, device) + self._wg_info_cache[param, device] = result + return result def kernel_set_args(self, *args, **kwargs): # Need to dupicate the 'self' argument for dynamically generated method @@ -1116,11 +887,6 @@ def _add_functionality(): Kernel.__init__ = kernel_init Kernel._setup = kernel__setup Kernel.get_work_group_info = kernel_get_work_group_info - Kernel._set_set_args_body = kernel__set_set_args_body - Kernel._generate_bufprot_arg_setter = kernel__generate_bufprot_arg_setter - Kernel._generate_bytes_arg_setter = kernel__generate_bytes_arg_setter - Kernel._generate_generic_arg_handler = kernel__generate_generic_arg_handler - Kernel._generate_naive_call = kernel__generate_naive_call Kernel.set_scalar_arg_dtypes = kernel_set_scalar_arg_dtypes Kernel.set_args = kernel_set_args Kernel.__call__ = kernel_call @@ -1673,7 +1439,9 @@ def enqueue_copy(queue, dest, src, **kwargs): else: raise ValueError("invalid dest mem object type") - elif isinstance(dest, SVM): + # FIXME + # elif isinstance(dest, SVM): + elif 0: # to SVM if isinstance(src, SVM): src = src.mem diff --git a/pyopencl/invoker.py b/pyopencl/invoker.py index 8cad3f25..7fad942c 100644 --- a/pyopencl/invoker.py +++ b/pyopencl/invoker.py @@ -28,7 +28,7 @@ import sys import numpy as np from warnings import warn -from pyopencl._cffi import ffi as _ffi +import pyopencl._cl as _cl from pytools.persistent_dict import WriteOncePersistentDict from pyopencl.tools import _NumpyTypesKeyBuilder @@ -44,7 +44,7 @@ _size_t_char = ({ 4: 'L', 2: 'H', 1: 'B', -})[_ffi.sizeof('size_t')] +})[_cl._sizeof_size_t()] _type_char_map = { 'n': _size_t_char.lower(), 'N': _size_t_char @@ -66,20 +66,14 @@ def generate_buffer_arg_setter(gen, arg_idx, buf_var): gen("{buf_var} = np.getbuffer({buf_var})".format(buf_var=buf_var)) gen(""" - c_buf, sz, _ = _cl._c_buffer_from_obj({buf_var}) - status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, c_buf, sz) - if status != _ffi.NULL: - _handle_error(status) + self._set_arg_buf({arg_idx}, {buf_var}) """ .format(arg_idx=arg_idx, buf_var=buf_var)) def generate_bytes_arg_setter(gen, arg_idx, buf_var): gen(""" - status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, - {buf_var}, len({buf_var})) - if status != _ffi.NULL: - _handle_error(status) + self._set_arg_buf({arg_idx}, {buf_var}) """ .format(arg_idx=arg_idx, buf_var=buf_var)) @@ -89,11 +83,9 @@ def generate_generic_arg_handler(gen, arg_idx, arg_var): gen(""" if {arg_var} is None: - status = _lib.kernel__set_arg_null(self.ptr, {arg_idx}) - if status != _ffi.NULL: - _handle_error(status) - elif isinstance({arg_var}, _cl._CLKernelArg): - self._set_arg_clkernelarg({arg_idx}, {arg_var}) + self._set_arg_null({arg_idx}) + elif isinstance({arg_var}, _KERNEL_ARG_CLASSES): + self.set_arg({arg_idx}, {arg_var}) """ .format(arg_idx=arg_idx, arg_var=arg_var)) @@ -289,10 +281,8 @@ def wrap_in_error_handler(body, arg_names): def add_local_imports(gen): gen("import numpy as np") - gen("import pyopencl.cffi_cl as _cl") - gen( - "from pyopencl.cffi_cl import _lib, " - "_ffi, _handle_error, _CLKernelArg") + gen("import pyopencl._cl as _cl") + gen("from pyopencl import _KERNEL_ARG_CLASSES") gen("") @@ -359,7 +349,7 @@ def _generate_enqueue_and_set_args_module(function_name, invoker_cache = WriteOncePersistentDict( - "pyopencl-invoker-cache-v1", + "pyopencl-invoker-cache-v4", key_builder=_NumpyTypesKeyBuilder()) diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index 5c7bf70b..45633bd2 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -7,6 +7,7 @@ using namespace pyopencl; void pyopencl_expose_part_1(py::module &m) { m.def("get_cl_header_version", get_cl_header_version); + m.def("_sizeof_size_t", [](){ return sizeof(size_t); }); // {{{ platform DEF_SIMPLE_FUNCTION(get_platforms); diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index f51e8a7b..13670472 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -320,6 +320,8 @@ void pyopencl_expose_part_2(py::module &m) .def(py::init()) .DEF_SIMPLE_METHOD(get_info) .DEF_SIMPLE_METHOD(get_work_group_info) + .def("_set_arg_null", &cls::set_arg_null) + .def("_set_arg_buf", &cls::set_arg_buf) .DEF_SIMPLE_METHOD(set_arg) #if PYOPENCL_CL_VERSION >= 0x1020 .DEF_SIMPLE_METHOD(get_arg_info) -- GitLab From 20df1a1399eb0c885bce6a471b398b5723394f5b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 00:32:10 -0500 Subject: [PATCH 14/92] Fix de-boostified mempool --- src/mempool.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mempool.hpp b/src/mempool.hpp index 2f649e2b..b24fcb02 100644 --- a/src/mempool.hpp +++ b/src/mempool.hpp @@ -41,7 +41,7 @@ namespace PYGPU_PACKAGE template - class memory_pool + class memory_pool : noncopyable { public: typedef typename Allocator::pointer_type pointer_type; @@ -239,7 +239,7 @@ namespace PYGPU_PACKAGE void free_held() { - for (bin_pair_t bin_pair: m_container) + for (bin_pair_t &bin_pair: m_container) { bin_t &bin = bin_pair.second; @@ -270,7 +270,7 @@ namespace PYGPU_PACKAGE bool try_to_free_memory() { // free largest stuff first - for (bin_pair_t bin_pair: reverse(m_container)) + for (bin_pair_t &bin_pair: reverse(m_container)) { bin_t &bin = bin_pair.second; -- GitLab From efafaff0dee2c551d474470966d5cb3d3f32bcb9 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 00:33:20 -0500 Subject: [PATCH 15/92] Minor test suite fixes --- test/test_array.py | 3 ++- test/test_wrapper.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_array.py b/test/test_array.py index bca78f5c..bfdd10e6 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -37,10 +37,11 @@ import pyopencl.tools as cl_tools from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) from pyopencl.characterize import has_double_support, has_struct_arg_count_bug -from pyopencl.cffi_cl import _PYPY from pyopencl.clrandom import RanluxGenerator, PhiloxGenerator, ThreefryGenerator +_PYPY = cl._PYPY + # {{{ helpers diff --git a/test/test_wrapper.py b/test/test_wrapper.py index 118eee74..efb86006 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -295,7 +295,7 @@ def test_image_format_constructor(): assert iform.channel_order == cl.channel_order.RGBA assert iform.channel_data_type == cl.channel_type.FLOAT - assert not iform.__dict__ + assert not hasattr(iform, "__dict__") def test_device_topology_amd_constructor(): -- GitLab From b7c19c1348c5bdac784524fd08e075ee00a5f5ad Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 14:23:13 -0500 Subject: [PATCH 16/92] Bump version --- pyopencl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyopencl/version.py b/pyopencl/version.py index ddb2bc14..f46939df 100644 --- a/pyopencl/version.py +++ b/pyopencl/version.py @@ -1,3 +1,3 @@ -VERSION = (2018, 1, 1) +VERSION = (2018, 2) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS -- GitLab From cfa09d47cdd7fbd13e644019435a5f3567eb3e79 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 14:23:47 -0500 Subject: [PATCH 17/92] [pybind] Fix handle_from_new_ptr to actually take ownership --- src/wrap_helpers.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wrap_helpers.hpp b/src/wrap_helpers.hpp index 1384d79a..1635a684 100644 --- a/src/wrap_helpers.hpp +++ b/src/wrap_helpers.hpp @@ -127,7 +127,7 @@ namespace template inline py::object handle_from_new_ptr(T *ptr) { - return py::cast(ptr); + return py::cast(ptr, py::return_value_policy::take_ownership); } template -- GitLab From c1c5af0932aaaaa33fd0bf3e2c1cfb97c2b6db57 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 14:24:27 -0500 Subject: [PATCH 18/92] [pybind] Bring back custom ext header --- src/pyopencl_ext.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++ src/wrap_cl.hpp | 2 +- 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 src/pyopencl_ext.h diff --git a/src/pyopencl_ext.h b/src/pyopencl_ext.h new file mode 100644 index 00000000..a9792d8b --- /dev/null +++ b/src/pyopencl_ext.h @@ -0,0 +1,58 @@ +#ifndef _PYOPENCL_EXT_H +#define _PYOPENCL_EXT_H + +#ifdef PYOPENCL_USE_SHIPPED_EXT + +#include "clinfo_ext.h" + +#else + +#if (defined(__APPLE__) && !defined(PYOPENCL_APPLE_USE_CL_H)) + +#include + +#else + +#include +#include + +#endif + +#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD +#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 + +typedef union +{ + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; +} cl_device_topology_amd; +#endif + +/* {{{ these NV defines are often missing from the system headers */ + +#ifndef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV +#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 +#endif +#ifndef CL_DEVICE_INTEGRATED_MEMORY_NV +#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 +#endif + +#ifndef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV +#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007 +#endif + +#ifndef CL_DEVICE_PCI_BUS_ID_NV +#define CL_DEVICE_PCI_BUS_ID_NV 0x4008 +#endif + +#ifndef CL_DEVICE_PCI_SLOT_ID_NV +#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 +#endif + +/* }}} */ + +#endif + +#endif + +/* vim: foldmethod=marker */ diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 297186de..9488a715 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -28,7 +28,7 @@ #define CL_TARGET_OPENCL_VERSION 220 #include -#include +#include "pyopencl_ext.h" #if defined(_WIN32) #define NOMINMAX -- GitLab From ea285415507a49aa4ef8e0250a9000b31324456c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 14:24:41 -0500 Subject: [PATCH 19/92] [pybind] Fix a few mis-wrapped Nones --- src/wrap_cl.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 9488a715..98b8966b 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -282,7 +282,7 @@ return py::object(handle_from_new_ptr( \ new TYPE(param_value, /*retain*/ true))); \ else \ - return py::object(); \ + return py::none(); \ } #define PYOPENCL_GET_VEC_INFO(WHAT, FIRST_ARG, SECOND_ARG, RES_VEC) \ @@ -1376,7 +1376,7 @@ namespace pyopencl return py::reinterpret_borrow(m_ward->m_buf.obj); } else - return py::object(); + return py::none(); } virtual void wait() @@ -1413,7 +1413,7 @@ namespace pyopencl virtual void wait() { event::wait(); - m_ward = py::object(); + m_ward = py::none(); } }; #endif @@ -1628,7 +1628,7 @@ namespace pyopencl if (m_hostbuf.get()) return py::reinterpret_borrow(m_hostbuf->m_buf.obj); else - return py::object(); + return py::none(); #else return m_hostbuf; #endif @@ -2290,7 +2290,7 @@ namespace pyopencl if (param_value == 0) { // no associated memory object? no problem. - return py::object(); + return py::none(); } return create_mem_object_wrapper(param_value); @@ -2870,7 +2870,7 @@ namespace pyopencl ~memory_map() { if (m_valid) - delete release(0, py::object()); + delete release(0, py::none()); } event *release(command_queue *cq, py::object py_wait_for) @@ -4218,7 +4218,7 @@ namespace pyopencl if (param_value == 0) { // no associated memory object? no problem. - return py::object(); + return py::none(); } return create_mem_object_wrapper(param_value); -- GitLab From 24d4ea3311f7a1ad8ac2aee576a5a5260196d97c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 15:27:17 -0500 Subject: [PATCH 20/92] [pybind] Remove extraneous fixme --- pyopencl/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 10c33c73..6c92c44b 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -188,9 +188,6 @@ from pyopencl._cl import ( # noqa GLTexture, # FIXME # DeviceTopologyAmd, - - # FIXME? - # add_get_info_attrs as _add_get_info_attrs, ) import inspect as _inspect -- GitLab From a9a14a441a663dde289813c55bc18fb95119b8b3 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 15:28:06 -0500 Subject: [PATCH 21/92] Remove deprecated copy aliases --- pyopencl/__init__.py | 39 --------------------------------------- test/test_wrapper.py | 8 ++++---- 2 files changed, 4 insertions(+), 43 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 6c92c44b..710f99a1 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -1223,45 +1223,6 @@ _csc = create_some_context # {{{ enqueue_copy -def _mark_copy_deprecated(func): - def new_func(*args, **kwargs): - from warnings import warn - warn("'%s' has been deprecated in version 2011.1. Please use " - "enqueue_copy() instead." % func.__name__[1:], DeprecationWarning, - stacklevel=2) - return func(*args, **kwargs) - - try: - from functools import update_wrapper - except ImportError: - pass - else: - try: - update_wrapper(new_func, func) - except AttributeError: - pass - - return new_func - - -enqueue_read_image = _mark_copy_deprecated(_cl._enqueue_read_image) -enqueue_write_image = _mark_copy_deprecated(_cl._enqueue_write_image) -enqueue_copy_image = _mark_copy_deprecated(_cl._enqueue_copy_image) -enqueue_copy_image_to_buffer = _mark_copy_deprecated( - _cl._enqueue_copy_image_to_buffer) -enqueue_copy_buffer_to_image = _mark_copy_deprecated( - _cl._enqueue_copy_buffer_to_image) -enqueue_read_buffer = _mark_copy_deprecated(_cl._enqueue_read_buffer) -enqueue_write_buffer = _mark_copy_deprecated(_cl._enqueue_write_buffer) -enqueue_copy_buffer = _mark_copy_deprecated(_cl._enqueue_copy_buffer) - - -if _cl.get_cl_header_version() >= (1, 1): - enqueue_read_buffer_rect = _mark_copy_deprecated(_cl._enqueue_read_buffer_rect) - enqueue_write_buffer_rect = _mark_copy_deprecated(_cl._enqueue_write_buffer_rect) - enqueue_copy_buffer_rect = _mark_copy_deprecated(_cl._enqueue_copy_buffer_rect) - - def enqueue_copy(queue, dest, src, **kwargs): """Copy from :class:`Image`, :class:`Buffer` or the host to :class:`Image`, :class:`Buffer` or the host. (Note: host-to-host diff --git a/test/test_wrapper.py b/test/test_wrapper.py index efb86006..03418480 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -351,7 +351,7 @@ def test_that_python_args_fail(ctx_factory): prg.mult(queue, a.shape, None, a_buf, np.float32(2), np.int32(3)) a_result = np.empty_like(a) - cl.enqueue_read_buffer(queue, a_buf, a_result).wait() + cl.enqueue_copy(queue, a_buf, a_result).wait() def test_image_2d(ctx_factory): @@ -513,8 +513,8 @@ def test_copy_buffer(ctx_factory): buf1 = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a) buf2 = cl.Buffer(context, mf.WRITE_ONLY, b.nbytes) - cl.enqueue_copy_buffer(queue, buf1, buf2).wait() - cl.enqueue_read_buffer(queue, buf2, b).wait() + cl.enqueue_copy(queue, buf2, buf1).wait() + cl.enqueue_copy(queue, b, buf2).wait() assert la.norm(a - b) == 0 @@ -569,7 +569,7 @@ def test_vector_args(ctx_factory): prg.set_vec(queue, dest.shape, None, x, dest_buf) - cl.enqueue_read_buffer(queue, dest_buf, dest).wait() + cl.enqueue_copy(queue, dest, dest_buf).wait() assert (dest == x).all() -- GitLab From 07969bc5fb9f0c05bbdffa72e8c9750932b60f02 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 15:28:27 -0500 Subject: [PATCH 22/92] [pybind] Remove extraneous fixme --- pyopencl/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 710f99a1..a5c28d92 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -557,9 +557,6 @@ class Program(object): return hash(self._get_prg()) -# _add_get_info_attrs(Program, Program.get_info, program_info) - - def create_program_with_built_in_kernels(context, devices, kernel_names): if not isinstance(kernel_names, str): kernel_names = ":".join(kernel_names) -- GitLab From 359d17f53ade28617b79386219134d458e6989ec Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 15:29:58 -0500 Subject: [PATCH 23/92] Move get_info attr bits down to allow overriding --- pyopencl/__init__.py | 141 ++++++++++++++++++++++--------------------- 1 file changed, 72 insertions(+), 69 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index a5c28d92..46e5266e 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -575,75 +575,6 @@ def link_program(context, programs, options=[], devices=None): # {{{ monkeypatch C++ wrappers to add functionality def _add_functionality(): - cls_to_info_cls = { - _cl.Platform: (_cl.Platform.get_info, _cl.platform_info, []), - _cl.Device: (_cl.Device.get_info, _cl.device_info, - ["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"]), - _cl.Context: (_cl.Context.get_info, _cl.context_info, []), - _cl.CommandQueue: (_cl.CommandQueue.get_info, _cl.command_queue_info, - ["CONTEXT", "DEVICE"]), - _cl.Event: (_cl.Event.get_info, _cl.event_info, []), - _cl.MemoryObjectHolder: - (MemoryObjectHolder.get_info, _cl.mem_info, []), - Image: (_cl.Image.get_image_info, _cl.image_info, []), - Program: (Program.get_info, _cl.program_info, []), - Kernel: (Kernel.get_info, _cl.kernel_info, []), - _cl.Sampler: (Sampler.get_info, _cl.sampler_info, []), - } - - def to_string(cls, value, default_format=None): - for name in dir(cls): - if (not name.startswith("_") and getattr(cls, name) == value): - return name - - if default_format is None: - raise ValueError("a name for value %d was not found in %s" - % (value, cls.__name__)) - else: - return default_format % value - - for cls in CONSTANT_CLASSES: - cls.to_string = classmethod(to_string) - - # {{{ get_info attributes ------------------------------------------------- - - def make_getinfo(info_method, info_name, info_attr): - def result(self): - return info_method(self, info_attr) - - return property(result) - - def make_cacheable_getinfo(info_method, info_name, cache_attr, info_attr): - def result(self): - try: - return getattr(self, cache_attr) - except AttributeError: - pass - - result = info_method(self, info_attr) - setattr(self, cache_attr, result) - return result - - return property(result) - - for cls, (info_method, info_class, cacheable_attrs) \ - in six.iteritems(cls_to_info_cls): - for info_name, info_value in six.iteritems(info_class.__dict__): - if info_name == "to_string" or info_name.startswith("_"): - continue - - info_lower = info_name.lower() - info_constant = getattr(info_class, info_name) - if info_name in cacheable_attrs: - cache_attr = intern("_info_cache_"+info_lower) - setattr(cls, info_lower, make_cacheable_getinfo( - info_method, info_lower, cache_attr, info_constant)) - else: - setattr(cls, info_lower, make_getinfo( - info_method, info_name, info_constant)) - - # }}} - # {{{ Platform def platform_repr(self): @@ -1070,6 +1001,78 @@ def _add_functionality(): # }}} + # ORDER DEPENDENCY: Some of the above may override get_info, the effect needs + # to be visible through the attributes. So get_info attr creation needs to happen + # after the overriding is complete. + cls_to_info_cls = { + _cl.Platform: (_cl.Platform.get_info, _cl.platform_info, []), + _cl.Device: (_cl.Device.get_info, _cl.device_info, + ["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"]), + _cl.Context: (_cl.Context.get_info, _cl.context_info, []), + _cl.CommandQueue: (_cl.CommandQueue.get_info, _cl.command_queue_info, + ["CONTEXT", "DEVICE"]), + _cl.Event: (_cl.Event.get_info, _cl.event_info, []), + _cl.MemoryObjectHolder: + (MemoryObjectHolder.get_info, _cl.mem_info, []), + Image: (_cl.Image.get_image_info, _cl.image_info, []), + Program: (Program.get_info, _cl.program_info, []), + Kernel: (Kernel.get_info, _cl.kernel_info, []), + _cl.Sampler: (Sampler.get_info, _cl.sampler_info, []), + } + + def to_string(cls, value, default_format=None): + for name in dir(cls): + if (not name.startswith("_") and getattr(cls, name) == value): + return name + + if default_format is None: + raise ValueError("a name for value %d was not found in %s" + % (value, cls.__name__)) + else: + return default_format % value + + for cls in CONSTANT_CLASSES: + cls.to_string = classmethod(to_string) + + # {{{ get_info attributes ------------------------------------------------- + + def make_getinfo(info_method, info_name, info_attr): + def result(self): + return info_method(self, info_attr) + + return property(result) + + def make_cacheable_getinfo(info_method, info_name, cache_attr, info_attr): + def result(self): + try: + return getattr(self, cache_attr) + except AttributeError: + pass + + result = info_method(self, info_attr) + setattr(self, cache_attr, result) + return result + + return property(result) + + for cls, (info_method, info_class, cacheable_attrs) \ + in six.iteritems(cls_to_info_cls): + for info_name, info_value in six.iteritems(info_class.__dict__): + if info_name == "to_string" or info_name.startswith("_"): + continue + + info_lower = info_name.lower() + info_constant = getattr(info_class, info_name) + if info_name in cacheable_attrs: + cache_attr = intern("_info_cache_"+info_lower) + setattr(cls, info_lower, make_cacheable_getinfo( + info_method, info_lower, cache_attr, info_constant)) + else: + setattr(cls, info_lower, make_getinfo( + info_method, info_name, info_constant)) + + # }}} + if _cl.have_gl(): def gl_object_get_gl_object(self): return self.get_gl_object_info()[1] -- GitLab From fa64dcaf056ea16b2d7aadc0449a57589a0a5453 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 15:30:25 -0500 Subject: [PATCH 24/92] [pybind] Revive compiler diagnostics support --- pyopencl/__init__.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 46e5266e..15ec5b1b 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -221,9 +221,27 @@ if _cl.have_gl(): pass +# {{{ diagnostics + class _ErrorRecord(Record): pass +class CompilerWarning(UserWarning): + pass + + +def compiler_output(text): + import os + from warnings import warn + if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")): + warn(text, CompilerWarning) + else: + warn("Non-empty compiler output encountered. Set the " + "environment variable PYOPENCL_COMPILER_OUTPUT=1 " + "to see more.", CompilerWarning) + +# }}} + # {{{ find pyopencl shipped source code -- GitLab From ae9c7173101f7a84ddc4efa7223b08bfcf7f7939 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 15:30:49 -0500 Subject: [PATCH 25/92] Remove debug=True compile option --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b1b3cbdc..b8b44f40 100644 --- a/setup.py +++ b/setup.py @@ -107,7 +107,7 @@ def main(): check_git_submodules, NumpyExtension) check_git_submodules() - hack_distutils(debug=True) + hack_distutils() conf = get_config(get_config_schema(), warn_about_no_config=False) -- GitLab From 690c2a8fde31fd0f2cd0f1b10bdc96a8951735bf Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 15:31:46 -0500 Subject: [PATCH 26/92] [pybind] Move cache_dir deprecation Context constructor arg deprecation to Python --- pyopencl/__init__.py | 13 +++++++++++++ src/wrap_cl_part_1.cpp | 14 ++------------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 15ec5b1b..725cf1a1 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -632,6 +632,19 @@ def _add_functionality(): # {{{ Context + context_old_init = Context.__init__ + + def context_init(self, devices, properties, dev_type, cache_dir=None): + if cache_dir is not None: + from warnings import warn + warn("The 'cache_dir' argument to the Context constructor " + "is deprecated and no longer has an effect. " + "It was removed because it only applied to the wrapper " + "object and not the context itself, leading to inconsistencies.", + DeprecationWarning, stacklevel=2) + + context_old_init(self, devices, properties, dev_type) + def context_repr(self): return "" % (self.int_ptr, ", ".join(repr(dev) for dev in self.devices)) diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index 45633bd2..23a0bf10 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -55,17 +55,8 @@ void pyopencl_expose_part_1(py::module &m) .def( py::init( [](py::object py_devices, py::object py_properties, - py::object py_dev_type, py::object cache_dir) + py::object py_dev_type) { - if (cache_dir.ptr() != Py_None) - { - py::object w = py::module::import("warnings"); - w.attr("warn")("The 'cache_dir' argument to the Context constructor " - "is deprecated and no longer has an effect. " - "It was removed because it only applied to the wrapper " - "object and not the context itself, leading to inconsistencies."); - } - PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( return create_context_inner( py_devices, @@ -75,8 +66,7 @@ void pyopencl_expose_part_1(py::module &m) }), py::arg("devices")=py::none(), py::arg("properties")=py::none(), - py::arg("dev_type")=py::none(), - py::arg("cache_dir")=py::none() + py::arg("dev_type")=py::none() ) .DEF_SIMPLE_METHOD(get_info) .def(py::self == py::self) -- GitLab From 4b4f3c232b32e872aac755f64414ce3a78d91e7f Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 15:32:20 -0500 Subject: [PATCH 27/92] [pybind] Fix stray cffi import in cache --- pyopencl/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyopencl/cache.py b/pyopencl/cache.py index 670134e8..48b6270e 100644 --- a/pyopencl/cache.py +++ b/pyopencl/cache.py @@ -374,7 +374,7 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes, if log is not None and log.strip()) if message: - from pyopencl.cffi_cl import compiler_output + from pyopencl import compiler_output compiler_output( "Built kernel retrieved from cache. Original from-source " "build had warnings:\n"+message) -- GitLab From 2e3f5fe9c738d7c66ec61134f3c730bbbb891176 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 15:32:48 -0500 Subject: [PATCH 28/92] Wrap kernel.program get_info to make sure a Program (not a _Program) gets returned --- pyopencl/__init__.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 725cf1a1..83431e5e 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -757,6 +757,7 @@ def _add_functionality(): # {{{ Kernel kernel_old_init = Kernel.__init__ + kernel_old_get_info = Kernel.get_info kernel_old_get_work_group_info = Kernel.get_work_group_info def kernel_init(self, prg, name): @@ -840,6 +841,14 @@ def _add_functionality(): capture_kernel_call(self, filename, queue, global_size, local_size, *args, **kwargs) + def kernel_get_info(self, param_name): + val = kernel_old_get_info(self, param_name) + + if isinstance(val, _Program): + return Program(val) + else: + return val + Kernel.__init__ = kernel_init Kernel._setup = kernel__setup Kernel.get_work_group_info = kernel_get_work_group_info @@ -847,6 +856,7 @@ def _add_functionality(): Kernel.set_args = kernel_set_args Kernel.__call__ = kernel_call Kernel.capture_call = kernel_capture_call + Kernel.get_info = kernel_get_info # }}} -- GitLab From 7cdeeaa9d4da050009435c6aa213a5c75a84138b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 15:33:36 -0500 Subject: [PATCH 29/92] [pybind] Use pybind-native GIL management --- src/wrap_cl.hpp | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 98b8966b..bccfed61 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -222,9 +222,10 @@ { \ PYOPENCL_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \ cl_int status_code; \ - Py_BEGIN_ALLOW_THREADS \ + { \ + py::gil_scoped_release release; \ status_code = NAME ARGLIST; \ - Py_END_ALLOW_THREADS \ + } \ if (status_code != CL_SUCCESS) \ throw pyopencl::error(#NAME, status_code);\ } @@ -242,9 +243,10 @@ { \ PYOPENCL_PRINT_CALL_TRACE(#NAME); \ cl_int status_code; \ - Py_BEGIN_ALLOW_THREADS \ + { \ + py::gil_scoped_release release; \ status_code = NAME ARGLIST; \ - Py_END_ALLOW_THREADS \ + } \ if (status_code != CL_SUCCESS) \ throw pyopencl::error(#NAME, status_code);\ } @@ -2923,14 +2925,15 @@ namespace pyopencl PYOPENCL_RETRY_IF_MEM_ERROR( { - Py_BEGIN_ALLOW_THREADS - mapped = clEnqueueMapBuffer( - cq.data(), buf.data(), - PYOPENCL_CAST_BOOL(is_blocking), flags, - offset, size_in_bytes, - PYOPENCL_WAITLIST_ARGS, &evt, - &status_code); - Py_END_ALLOW_THREADS + { + py::gil_scoped_release release; + mapped = clEnqueueMapBuffer( + cq->data(), buf.data(), + PYOPENCL_CAST_BOOL(is_blocking), flags, + offset, size_in_bytes, + PYOPENCL_WAITLIST_ARGS, &evt, + &status_code); + } if (status_code != CL_SUCCESS) throw pyopencl::error("clEnqueueMapBuffer", status_code); } ); @@ -2997,14 +3000,15 @@ namespace pyopencl void *mapped; PYOPENCL_RETRY_IF_MEM_ERROR( { - Py_BEGIN_ALLOW_THREADS - mapped = clEnqueueMapImage( - cq.data(), img.data(), - PYOPENCL_CAST_BOOL(is_blocking), flags, - origin, region, &row_pitch, &slice_pitch, - PYOPENCL_WAITLIST_ARGS, &evt, - &status_code); - Py_END_ALLOW_THREADS + { + py::gil_scoped_release release; + mapped = clEnqueueMapImage( + cq->data(), img.data(), + PYOPENCL_CAST_BOOL(is_blocking), flags, + origin, region, &row_pitch, &slice_pitch, + PYOPENCL_WAITLIST_ARGS, &evt, + &status_code); + } if (status_code != CL_SUCCESS) throw pyopencl::error("clEnqueueMapImage", status_code); } ); -- GitLab From 1f2cd48a499584cb98596621ccb3290e38b9ccf0 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 15:33:55 -0500 Subject: [PATCH 30/92] [pybind] misc wrapper fixes --- src/wrap_cl.hpp | 14 +++++++------- src/wrap_cl_part_1.cpp | 2 +- src/wrap_cl_part_2.cpp | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index bccfed61..09fa3188 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -2859,12 +2859,12 @@ namespace pyopencl { private: bool m_valid; - command_queue m_queue; + std::shared_ptr m_queue; memory_object m_mem; void *m_ptr; public: - memory_map(command_queue &cq, memory_object const &mem, void *ptr) + memory_map(std::shared_ptr cq, memory_object const &mem, void *ptr) : m_valid(true), m_queue(cq), m_mem(mem), m_ptr(ptr) { } @@ -2880,7 +2880,7 @@ namespace pyopencl PYOPENCL_PARSE_WAIT_FOR; if (cq == 0) - cq = &m_queue; + cq = m_queue.get(); cl_event evt; PYOPENCL_CALL_GUARDED(clEnqueueUnmapMemObject, ( @@ -2899,7 +2899,7 @@ namespace pyopencl inline py::object enqueue_map_buffer( - command_queue &cq, + std::shared_ptr cq, memory_object_holder &buf, cl_map_flags flags, size_t offset, @@ -2959,7 +2959,7 @@ namespace pyopencl catch (...) { PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueUnmapMemObject, ( - cq.data(), buf.data(), mapped, 0, 0, 0)); + cq->data(), buf.data(), mapped, 0, 0, 0)); throw; } @@ -2977,7 +2977,7 @@ namespace pyopencl inline py::object enqueue_map_image( - command_queue &cq, + std::shared_ptr cq, memory_object_holder &img, cl_map_flags flags, py::object py_origin, @@ -3023,7 +3023,7 @@ namespace pyopencl catch (...) { PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueUnmapMemObject, ( - cq.data(), img.data(), mapped, 0, 0, 0)); + cq->data(), img.data(), mapped, 0, 0, 0)); throw; } diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index 23a0bf10..82e231d7 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -81,7 +81,7 @@ void pyopencl_expose_part_1(py::module &m) // {{{ command queue { typedef command_queue cls; - py::class_(m, "CommandQueue", py::dynamic_attr()) + py::class_>(m, "CommandQueue", py::dynamic_attr()) .def( py::init(), diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index 13670472..c5b5e612 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -53,6 +53,7 @@ void pyopencl_expose_part_2(py::module &m) { typedef cl_image_desc cls; py::class_(m, "ImageDescriptor") + .def(py::init<>()) .def_readwrite("image_type", &cls::image_type) .def_property("shape", &image_desc_dummy_getter, image_desc_set_shape) .def_readwrite("array_size", &cls::image_array_size) @@ -197,7 +198,7 @@ void pyopencl_expose_part_2(py::module &m) typedef memory_map cls; py::class_(m, "MemoryMap", py::dynamic_attr()) .def("release", &cls::release, - py::arg("queue")=0, + py::arg("queue").none(true)=nullptr, py::arg("wait_for")=py::none() ) ; -- GitLab From 1e4cf2525cb5de054a6ea198be224ee28f2cba31 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 15:35:19 -0500 Subject: [PATCH 31/92] Flake8 spacing fixes --- pyopencl/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 83431e5e..f0178c97 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -226,6 +226,7 @@ if _cl.have_gl(): class _ErrorRecord(Record): pass + class CompilerWarning(UserWarning): pass @@ -1121,6 +1122,7 @@ def _add_functionality(): GLBuffer.gl_object = property(gl_object_get_gl_object) GLTexture.gl_object = property(gl_object_get_gl_object) + _add_functionality() # }}} -- GitLab From 6ca59dca1ae44fa6cafedaea3d193376a378689c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 16:41:01 -0500 Subject: [PATCH 32/92] Remove enqueue_task --- pyopencl/__init__.py | 1 - src/wrap_cl.hpp | 24 ------------------------ src/wrap_cl_part_2.cpp | 5 ----- 3 files changed, 30 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index f0178c97..ffd03907 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -143,7 +143,6 @@ from pyopencl._cl import ( # noqa UserEvent, enqueue_nd_range_kernel, - enqueue_task, _enqueue_marker_with_wait_list, _enqueue_marker, diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 09fa3188..35e10667 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -3853,30 +3853,6 @@ namespace pyopencl } ); } - - - - - - inline - event *enqueue_task( - command_queue &cq, - kernel &knl, - py::object py_wait_for) - { - PYOPENCL_PARSE_WAIT_FOR; - - PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( { - cl_event evt; - PYOPENCL_CALL_GUARDED(clEnqueueTask, ( - cq.data(), - knl.data(), - PYOPENCL_WAITLIST_ARGS, &evt - )); - PYOPENCL_RETURN_NEW_EVENT(evt); - } ); - } - // }}} diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index c5b5e612..0a31ac6c 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -354,11 +354,6 @@ void pyopencl_expose_part_2(py::module &m) py::arg("wait_for")=py::none(), py::arg("g_times_l")=false ); - m.def("enqueue_task", enqueue_task, - py::arg("queue"), - py::arg("kernel"), - py::arg("wait_for")=py::none() - ); // TODO: clEnqueueNativeKernel // }}} -- GitLab From 39907d60a30b7a7f9280ce4d92f1da869a008d80 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 16:43:03 -0500 Subject: [PATCH 33/92] Remove support for device_affinity_ext (in core as of CL 1.2) --- src/wrap_cl.hpp | 87 +----------------------------------------- src/wrap_cl_part_1.cpp | 3 -- src/wrap_constants.cpp | 36 ----------------- 3 files changed, 1 insertion(+), 125 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 35e10667..42a8205b 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -534,7 +534,6 @@ namespace pyopencl public: enum reference_type_t { REF_NOT_OWNABLE, - REF_FISSION_EXT, #if PYOPENCL_CL_VERSION >= 0x1020 REF_CL_1_2, #endif @@ -555,21 +554,6 @@ namespace pyopencl { if (false) { } -#if (defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION)) - else if (ref_type == REF_FISSION_EXT) - { -#if PYOPENCL_CL_VERSION >= 0x1020 - cl_platform_id plat; - PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (m_device, CL_DEVICE_PLATFORM, - sizeof(plat), &plat, NULL)); -#endif - - PYOPENCL_GET_EXT_FUN(plat, - clRetainDeviceEXT, retain_func); - - PYOPENCL_CALL_GUARDED(retain_func, (did)); - } -#endif #if PYOPENCL_CL_VERSION >= 0x1020 else if (ref_type == REF_CL_1_2) @@ -586,26 +570,8 @@ namespace pyopencl ~device() { - if (false) - { } -#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) - else if (m_ref_type == REF_FISSION_EXT) - { #if PYOPENCL_CL_VERSION >= 0x1020 - cl_platform_id plat; - PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (m_device, CL_DEVICE_PLATFORM, - sizeof(plat), &plat, NULL)); -#endif - - PYOPENCL_GET_EXT_FUN(plat, - clReleaseDeviceEXT, release_func); - - PYOPENCL_CALL_GUARDED_CLEANUP(release_func, (m_device)); - } -#endif - -#if PYOPENCL_CL_VERSION >= 0x1020 - else if (m_ref_type == REF_CL_1_2) + if (m_ref_type == REF_CL_1_2) PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseDevice, (m_device)); #endif } @@ -721,19 +687,6 @@ namespace pyopencl case CL_DEVICE_INTEGRATED_MEMORY_NV: DEV_GET_INT_INF(cl_bool); #endif -#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) - case CL_DEVICE_PARENT_DEVICE_EXT: - PYOPENCL_GET_OPAQUE_INFO(Device, m_device, param_name, cl_device_id, device); - case CL_DEVICE_PARTITION_TYPES_EXT: - case CL_DEVICE_AFFINITY_DOMAINS_EXT: - case CL_DEVICE_PARTITION_STYLE_EXT: - { - std::vector result; - PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); - PYOPENCL_RETURN_VECTOR(cl_device_partition_property_ext, result); - } - case CL_DEVICE_REFERENCE_COUNT_EXT: DEV_GET_INT_INF(cl_uint); -#endif #if PYOPENCL_CL_VERSION >= 0x1020 case CL_DEVICE_LINKER_AVAILABLE: DEV_GET_INT_INF(cl_bool); case CL_DEVICE_BUILT_IN_KERNELS: @@ -852,44 +805,6 @@ namespace pyopencl } #endif -#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) - py::list create_sub_devices_ext(py::object py_properties) - { - std::vector properties; - -#if PYOPENCL_CL_VERSION >= 0x1020 - cl_platform_id plat; - PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (m_device, CL_DEVICE_PLATFORM, - sizeof(plat), &plat, NULL)); -#endif - - PYOPENCL_GET_EXT_FUN(plat, clCreateSubDevicesEXT, create_sub_dev); - - COPY_PY_LIST(cl_device_partition_property_ext, properties); - properties.push_back(CL_PROPERTIES_LIST_END_EXT); - - cl_device_partition_property_ext *props_ptr - = properties.empty( ) ? NULL : &properties.front(); - - cl_uint num_entries; - PYOPENCL_CALL_GUARDED(create_sub_dev, - (m_device, props_ptr, 0, NULL, &num_entries)); - - std::vector result; - result.resize(num_entries); - - PYOPENCL_CALL_GUARDED(create_sub_dev, - (m_device, props_ptr, num_entries, &result.front(), NULL)); - - py::list py_result; - for (cl_device_id did: result) - py_result.append(handle_from_new_ptr( - new pyopencl::device(did, /*retain*/true, - device::REF_FISSION_EXT))); - return py_result; - } -#endif - }; diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index 82e231d7..785427ee 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -35,9 +35,6 @@ void pyopencl_expose_part_1(py::module &m) .def(py::self == py::self) .def(py::self != py::self) .def("__hash__", &cls::hash) -#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) - .DEF_SIMPLE_METHOD(create_sub_devices_ext) -#endif #if PYOPENCL_CL_VERSION >= 0x1020 .DEF_SIMPLE_METHOD(create_sub_devices) #endif diff --git a/src/wrap_constants.cpp b/src/wrap_constants.cpp index 495850d5..3356eba5 100644 --- a/src/wrap_constants.cpp +++ b/src/wrap_constants.cpp @@ -49,9 +49,6 @@ namespace class device_partition_property { }; class device_affinity_domain { }; - class device_partition_property_ext { }; - class affinity_domain_ext { }; - class gl_object_type { }; class gl_texture_info { }; @@ -344,13 +341,6 @@ void pyopencl_expose_constants(py::module &m) #ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT ADD_ATTR(DEVICE_, MAX_ATOMIC_COUNTERS_EXT); #endif -#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) - ADD_ATTR(DEVICE_, PARENT_DEVICE_EXT); - ADD_ATTR(DEVICE_, PARTITION_TYPES_EXT); - ADD_ATTR(DEVICE_, AFFINITY_DOMAINS_EXT); - ADD_ATTR(DEVICE_, REFERENCE_COUNT_EXT); - ADD_ATTR(DEVICE_, PARTITION_STYLE_EXT); -#endif #if PYOPENCL_CL_VERSION >= 0x1020 ADD_ATTR(DEVICE_, LINKER_AVAILABLE); ADD_ATTR(DEVICE_, BUILT_IN_KERNELS); @@ -775,32 +765,6 @@ void pyopencl_expose_constants(py::module &m) #endif } - { - py::class_ cls( - m, "device_partition_property_ext"); -#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) - ADD_ATTR_SUFFIX(DEVICE_PARTITION_, EQUALLY, _EXT); - ADD_ATTR_SUFFIX(DEVICE_PARTITION_, BY_COUNTS, _EXT); - ADD_ATTR_SUFFIX(DEVICE_PARTITION_, BY_NAMES, _EXT); - ADD_ATTR_SUFFIX(DEVICE_PARTITION_, BY_AFFINITY_DOMAIN, _EXT); - ADD_ATTR_SUFFIX(, PROPERTIES_LIST_END, _EXT); - ADD_ATTR_SUFFIX(, PARTITION_BY_COUNTS_LIST_END, _EXT); - ADD_ATTR_SUFFIX(, PARTITION_BY_NAMES_LIST_END, _EXT); -#endif - } - - { - py::class_ cls(m, "affinity_domain_ext"); -#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) - ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, L1_CACHE, _EXT); - ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, L2_CACHE, _EXT); - ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, L3_CACHE, _EXT); - ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, L4_CACHE, _EXT); - ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, NUMA, _EXT); - ADD_ATTR_SUFFIX(AFFINITY_DOMAIN_, NEXT_FISSIONABLE, _EXT); -#endif - } - { py::class_ cls( m, "device_partition_property"); -- GitLab From 6071d90d03c4e129a4ffe969f81c5ab94744dd46 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 17:29:46 -0500 Subject: [PATCH 34/92] Fix potential uninitalized var use --- src/wrap_cl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 42a8205b..1a4cbd48 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -2330,7 +2330,7 @@ namespace pyopencl "'shape' must be given"); void *buf = 0; - PYOPENCL_BUFFER_SIZE_T len; + PYOPENCL_BUFFER_SIZE_T len = 0; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE std::unique_ptr retained_buf_obj; -- GitLab From 582457c4a7ebb824c87dc1427e900702f5e2e873 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 17:30:05 -0500 Subject: [PATCH 35/92] Wrap CL2 constants --- pyopencl/__init__.py | 21 ++---- src/wrap_cl.hpp | 98 +++++++++++++++++++++--- src/wrap_constants.cpp | 168 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 263 insertions(+), 24 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index ffd03907..0b3de63a 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -64,20 +64,17 @@ from pyopencl._cl import ( # noqa device_mem_cache_type, device_local_mem_type, device_exec_capabilities, - # FIXME - # device_svm_capabilities, + device_svm_capabilities, command_queue_properties, context_info, gl_context_info, context_properties, command_queue_info, - # FIXME - # queue_properties, + queue_properties, mem_flags, - # FIXME - # svm_mem_flags, + svm_mem_flags, channel_order, channel_type, @@ -94,10 +91,9 @@ from pyopencl._cl import ( # noqa kernel_info, kernel_arg_info, - # FIXME - # kernel_arg_address_qualifier, - # kernel_arg_access_qualifier, - # kernel_arg_type_qualifier, + kernel_arg_address_qualifier, + kernel_arg_access_qualifier, + kernel_arg_type_qualifier, kernel_work_group_info, event_info, @@ -105,8 +101,7 @@ from pyopencl._cl import ( # noqa command_execution_status, profiling_info, mem_migration_flags, - # FIXME - # mem_migration_flags_ext, + mem_migration_flags_ext, device_partition_property, device_affinity_domain, gl_object_type, @@ -132,8 +127,6 @@ from pyopencl._cl import ( # noqa # SVM, # SVMMap, - # FIXME - # CompilerWarning, _Program, Kernel, diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 1a4cbd48..ab735ea1 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -648,7 +648,11 @@ namespace pyopencl case CL_DEVICE_AVAILABLE: DEV_GET_INT_INF(cl_bool); case CL_DEVICE_COMPILER_AVAILABLE: DEV_GET_INT_INF(cl_bool); case CL_DEVICE_EXECUTION_CAPABILITIES: DEV_GET_INT_INF(cl_device_exec_capabilities); +#if PYOPENCL_CL_VERSION >= 0x2000 + case CL_DEVICE_QUEUE_ON_HOST_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties); +#else case CL_DEVICE_QUEUE_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties); +#endif case CL_DEVICE_NAME: case CL_DEVICE_VENDOR: @@ -687,6 +691,30 @@ namespace pyopencl case CL_DEVICE_INTEGRATED_MEMORY_NV: DEV_GET_INT_INF(cl_bool); #endif +#ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV + case CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV: + DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_PCI_BUS_ID_NV + case CL_DEVICE_PCI_BUS_ID_NV: + DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_PCI_SLOT_ID_NV + case CL_DEVICE_PCI_SLOT_ID_NV: + DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD + case CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD: DEV_GET_INT_INF(cl_bool); +#endif +#ifdef CL_DEVICE_GFXIP_MAJOR_AMD + case CL_DEVICE_GFXIP_MAJOR_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_GFXIP_MINOR_AMD + case CL_DEVICE_GFXIP_MINOR_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD + case CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD: DEV_GET_INT_INF(cl_uint); +#endif #if PYOPENCL_CL_VERSION >= 0x1020 case CL_DEVICE_LINKER_AVAILABLE: DEV_GET_INT_INF(cl_bool); case CL_DEVICE_BUILT_IN_KERNELS: @@ -713,7 +741,7 @@ namespace pyopencl case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC: DEV_GET_INT_INF(cl_bool); case CL_DEVICE_PRINTF_BUFFER_SIZE: DEV_GET_INT_INF(cl_bool); #endif -// {{{ AMD dev attrs +// {{{ AMD dev attrs cl_amd_device_attribute_query // // types of AMD dev attrs divined from // https://www.khronos.org/registry/cl/api/1.2/cl.hpp @@ -740,15 +768,6 @@ namespace pyopencl #ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD case CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD: DEV_GET_INT_INF(cl_uint); #endif -#ifdef CL_DEVICE_SIMD_WIDTH_AMD - case CL_DEVICE_SIMD_WIDTH_AMD: DEV_GET_INT_INF(cl_uint); -#endif -#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD - case CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD: DEV_GET_INT_INF(cl_uint); -#endif -#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD - case CL_DEVICE_WAVEFRONT_WIDTH_AMD: DEV_GET_INT_INF(cl_uint); -#endif #ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD case CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD: DEV_GET_INT_INF(cl_uint); #endif @@ -769,6 +788,56 @@ namespace pyopencl #ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT case CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT: DEV_GET_INT_INF(cl_uint); #endif +#if PYOPENCL_CL_VERSION >= 0x2000 + case CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE: DEV_GET_INT_INF(size_t); + case CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties); + case CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_ON_DEVICE_QUEUES: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_ON_DEVICE_EVENTS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_SVM_CAPABILITIES: DEV_GET_INT_INF(cl_device_svm_capabilities); + case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: DEV_GET_INT_INF(size_t); + case CL_DEVICE_MAX_PIPE_ARGS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PIPE_MAX_PACKET_SIZE: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT: DEV_GET_INT_INF(cl_uint); +#endif +#if PYOPENCL_CL_VERSION >= 0x2010 + case CL_DEVICE_IL_VERSION: DEV_GET_INT_INF(); + case CL_DEVICE_MAX_NUM_SUB_GROUPS: DEV_GET_INT_INF(); + case CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: DEV_GET_INT_INF(); +#endif +#ifdef CL_DEVICE_ME_VERSION_INTEL + case CL_DEVICE_ME_VERSION_INTEL: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM + case CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_PAGE_SIZE_QCOM + case CL_DEVICE_PAGE_SIZE_QCOM: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_SPIR_VERSIONS + case CL_DEVICE_SPIR_VERSIONS: + PYOPENCL_GET_STR_INFO(Device, m_device, param_name); +#endif +#ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA + case CL_DEVICE_CORE_TEMPERATURE_ALTERA: DEV_GET_INT_INF(cl_int); +#endif + +#ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL + case CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + PYOPENCL_RETURN_VECTOR(cl_uint, result); + } +#endif +#ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL + case CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL: DEV_GET_INT_INF(cl_uint); +#endif default: throw error("Device.get_info", CL_INVALID_VALUE); @@ -1255,6 +1324,9 @@ namespace pyopencl case CL_PROFILING_COMMAND_SUBMIT: case CL_PROFILING_COMMAND_START: case CL_PROFILING_COMMAND_END: +#if PYOPENCL_CL_VERSION >= 0x2000 + case CL_PROFILING_COMMAND_COMPLETE: +#endif PYOPENCL_GET_INTEGRAL_INFO(EventProfiling, m_event, param_name, cl_ulong); default: @@ -3177,6 +3249,12 @@ namespace pyopencl PYOPENCL_FIRST_ARG, param_name, cl_program_binary_type); #endif +#if PYOPENCL_CL_VERSION >= 0x2000 + case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE: + PYOPENCL_GET_INTEGRAL_INFO(ProgramBuild, + PYOPENCL_FIRST_ARG, param_name, + size_t); +#endif #undef PYOPENCL_FIRST_ARG default: diff --git a/src/wrap_constants.cpp b/src/wrap_constants.cpp index 3356eba5..7bf410c9 100644 --- a/src/wrap_constants.cpp +++ b/src/wrap_constants.cpp @@ -15,6 +15,7 @@ namespace class device_mem_cache_type { }; class device_local_mem_type { }; class device_exec_capabilities { }; + class device_svm_capabilities { }; class command_queue_properties { }; class context_info { }; class gl_context_info { }; @@ -38,6 +39,7 @@ namespace class kernel_arg_info { }; class kernel_arg_address_qualifier { }; class kernel_arg_access_qualifier { }; + class kernel_arg_type_qualifier { }; class kernel_work_group_info { }; class event_info { }; class command_type { }; @@ -186,6 +188,11 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(, INVALID_DEVICE_PARTITION_COUNT); #endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(, INVALID_PIPE_SIZE); + ADD_ATTR(, INVALID_DEVICE_QUEUE); +#endif + #if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) ADD_ATTR(, DEVICE_PARTITION_FAILED_EXT); ADD_ATTR(, INVALID_PARTITION_COUNT_EXT); @@ -267,6 +274,9 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(DEVICE_, COMPILER_AVAILABLE); ADD_ATTR(DEVICE_, EXECUTION_CAPABILITIES); ADD_ATTR(DEVICE_, QUEUE_PROPERTIES); +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(DEVICE_, QUEUE_ON_HOST_PROPERTIES); +#endif ADD_ATTR(DEVICE_, NAME); ADD_ATTR(DEVICE_, VENDOR); ADD_ATTR(, DRIVER_VERSION); @@ -296,6 +306,16 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(DEVICE_, GPU_OVERLAP_NV); ADD_ATTR(DEVICE_, KERNEL_EXEC_TIMEOUT_NV); ADD_ATTR(DEVICE_, INTEGRATED_MEMORY_NV); + // Nvidia specific device attributes, not defined in Khronos CL/cl_ext.h +#ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV + ADD_ATTR(DEVICE_, ATTRIBUTE_ASYNC_ENGINE_COUNT_NV); +#endif +#ifdef CL_DEVICE_PCI_BUS_ID_NV + ADD_ATTR(DEVICE_, PCI_BUS_ID_NV); +#endif +#ifdef CL_DEVICE_PCI_SLOT_ID_NV + ADD_ATTR(DEVICE_, PCI_SLOT_ID_NV); +#endif #endif // {{{ cl_amd_device_attribute_query #ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD @@ -338,6 +358,19 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(DEVICE_, LOCAL_MEM_BANKS_AMD); #endif // }}} +#ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD + ADD_ATTR(DEVICE_, THREAD_TRACE_SUPPORTED_AMD); +#endif +#ifdef CL_DEVICE_GFXIP_MAJOR_AMD + ADD_ATTR(DEVICE_, GFXIP_MAJOR_AMD); +#endif +#ifdef CL_DEVICE_GFXIP_MINOR_AMD + ADD_ATTR(DEVICE_, GFXIP_MINOR_AMD); +#endif +#ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD + ADD_ATTR(DEVICE_, AVAILABLE_ASYNC_QUEUES_AMD); +#endif + #ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT ADD_ATTR(DEVICE_, MAX_ATOMIC_COUNTERS_EXT); #endif @@ -358,6 +391,58 @@ void pyopencl_expose_constants(py::module &m) #ifdef cl_khr_image2d_from_buffer ADD_ATTR(DEVICE_, IMAGE_PITCH_ALIGNMENT); ADD_ATTR(DEVICE_, IMAGE_BASE_ADDRESS_ALIGNMENT); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(DEVICE_, MAX_READ_WRITE_IMAGE_ARGS); + ADD_ATTR(DEVICE_, MAX_GLOBAL_VARIABLE_SIZE); + ADD_ATTR(DEVICE_, QUEUE_ON_DEVICE_PROPERTIES); + ADD_ATTR(DEVICE_, QUEUE_ON_DEVICE_PREFERRED_SIZE); + ADD_ATTR(DEVICE_, QUEUE_ON_DEVICE_MAX_SIZE); + ADD_ATTR(DEVICE_, MAX_ON_DEVICE_QUEUES); + ADD_ATTR(DEVICE_, MAX_ON_DEVICE_EVENTS); + ADD_ATTR(DEVICE_, SVM_CAPABILITIES); + ADD_ATTR(DEVICE_, GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE); + ADD_ATTR(DEVICE_, MAX_PIPE_ARGS); + ADD_ATTR(DEVICE_, PIPE_MAX_ACTIVE_RESERVATIONS); + ADD_ATTR(DEVICE_, PIPE_MAX_PACKET_SIZE); + ADD_ATTR(DEVICE_, PREFERRED_PLATFORM_ATOMIC_ALIGNMENT); + ADD_ATTR(DEVICE_, PREFERRED_GLOBAL_ATOMIC_ALIGNMENT); + ADD_ATTR(DEVICE_, PREFERRED_LOCAL_ATOMIC_ALIGNMENT); +#endif +#if PYOPENCL_CL_VERSION >= 0x2010 + ADD_ATTR(DEVICE_, IL_VERSION); + ADD_ATTR(DEVICE_, MAX_NUM_SUB_GROUPS); + ADD_ATTR(DEVICE_, SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS); +#endif + /* cl_intel_advanced_motion_estimation */ +#ifdef CL_DEVICE_ME_VERSION_INTEL + ADD_ATTR(DEVICE_, ME_VERSION_INTEL); +#endif + + /* cl_qcom_ext_host_ptr */ +#ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM + ADD_ATTR(DEVICE_, EXT_MEM_PADDING_IN_BYTES_QCOM); +#endif +#ifdef CL_DEVICE_PAGE_SIZE_QCOM + ADD_ATTR(DEVICE_, PAGE_SIZE_QCOM); +#endif + + /* cl_khr_spir */ +#ifdef CL_DEVICE_SPIR_VERSIONS + ADD_ATTR(DEVICE_, SPIR_VERSIONS); +#endif + + /* cl_altera_device_temperature */ +#ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA + ADD_ATTR(DEVICE_, CORE_TEMPERATURE_ALTERA); +#endif + + /* cl_intel_simultaneous_sharing */ +#ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL + ADD_ATTR(DEVICE_, SIMULTANEOUS_INTEROPS_INTEL); +#endif +#ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL + ADD_ATTR(DEVICE_, NUM_SIMULTANEOUS_INTEROPS_INTEL); #endif } @@ -399,12 +484,27 @@ void pyopencl_expose_constants(py::module &m) #endif } + { + py::class_ cls(m, "device_svm_capabilities"); +#if PYOPENCL_CL_VERSION >= 0x2000 + // device_svm_capabilities + ADD_ATTR(DEVICE_SVM_, COARSE_GRAIN_BUFFER); + ADD_ATTR(DEVICE_SVM_, FINE_GRAIN_BUFFER); + ADD_ATTR(DEVICE_SVM_, FINE_GRAIN_SYSTEM); + ADD_ATTR(DEVICE_SVM_, ATOMICS); +#endif + } + { py::class_ cls(m, "command_queue_properties"); ADD_ATTR(QUEUE_, OUT_OF_ORDER_EXEC_MODE_ENABLE); ADD_ATTR(QUEUE_, PROFILING_ENABLE); #ifdef CL_QUEUE_IMMEDIATE_EXECUTION_ENABLE_INTEL ADD_ATTR(QUEUE_, IMMEDIATE_EXECUTION_ENABLE_INTEL); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(QUEUE_, ON_DEVICE); + ADD_ATTR(QUEUE_, ON_DEVICE_DEFAULT); #endif } @@ -456,6 +556,15 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(QUEUE_, PROPERTIES); } + { + // queue_properties +#if PYOPENCL_CL_VERSION >= 0x2000 + py::class_ cls(m, "queue_properties"); + ADD_ATTR(QUEUE_, PROPERTIES); + ADD_ATTR(QUEUE_, SIZE); +#endif + } + { py::class_ cls(m, "mem_flags"); ADD_ATTR(MEM_, READ_WRITE); @@ -471,6 +580,20 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(MEM_, HOST_WRITE_ONLY); ADD_ATTR(MEM_, HOST_READ_ONLY); ADD_ATTR(MEM_, HOST_NO_ACCESS); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(MEM_, KERNEL_READ_AND_WRITE); +#endif + } + + { +#if PYOPENCL_CL_VERSION >= 0x2000 + py::class_ cls(m, "svm_mem_flags"); + ADD_ATTR(MEM_, READ_WRITE); + ADD_ATTR(MEM_, WRITE_ONLY); + ADD_ATTR(MEM_, READ_ONLY); + ADD_ATTR(MEM_, SVM_FINE_GRAIN_BUFFER); + ADD_ATTR(MEM_, SVM_ATOMICS); #endif } @@ -489,6 +612,13 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR( , Rx); ADD_ATTR( , RGx); ADD_ATTR( , RGBx); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR( , sRGB); + ADD_ATTR( , sRGBx); + ADD_ATTR( , sRGBA); + ADD_ATTR( , sBGRA); + ADD_ATTR( , ABGR); #endif } @@ -521,6 +651,9 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(MEM_OBJECT_, IMAGE1D); ADD_ATTR(MEM_OBJECT_, IMAGE1D_ARRAY); ADD_ATTR(MEM_OBJECT_, IMAGE1D_BUFFER); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(MEM_OBJECT_, PIPE); #endif } @@ -536,6 +669,9 @@ void pyopencl_expose_constants(py::module &m) #if PYOPENCL_CL_VERSION >= 0x1010 ADD_ATTR(MEM_, ASSOCIATED_MEMOBJECT); ADD_ATTR(MEM_, OFFSET); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(MEM_, USES_SVM_POINTER); #endif } @@ -580,6 +716,11 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(SAMPLER_, NORMALIZED_COORDS); ADD_ATTR(SAMPLER_, ADDRESSING_MODE); ADD_ATTR(SAMPLER_, FILTER_MODE); +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(SAMPLER_, MIP_FILTER_MODE); + ADD_ATTR(SAMPLER_, LOD_MIN); + ADD_ATTR(SAMPLER_, LOD_MAX); +#endif } { @@ -613,6 +754,9 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(PROGRAM_BUILD_, LOG); #if PYOPENCL_CL_VERSION >= 0x1020 ADD_ATTR(PROGRAM_, BINARY_TYPE); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(PROGRAM_BUILD_, GLOBAL_VARIABLE_TOTAL_SIZE); #endif } @@ -670,6 +814,20 @@ void pyopencl_expose_constants(py::module &m) #endif } + { + py::class_ cls( + m, "kernel_arg_type_qualifier"); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(KERNEL_ARG_TYPE_, NONE); + ADD_ATTR(KERNEL_ARG_TYPE_, CONST); + ADD_ATTR(KERNEL_ARG_TYPE_, RESTRICT); + ADD_ATTR(KERNEL_ARG_TYPE_, VOLATILE); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(KERNEL_ARG_TYPE_, PIPE); +#endif + } + { py::class_ cls(m, "kernel_work_group_info"); ADD_ATTR(KERNEL_, WORK_GROUP_SIZE); @@ -728,6 +886,13 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(COMMAND_, MIGRATE_MEM_OBJECTS); ADD_ATTR(COMMAND_, FILL_BUFFER); ADD_ATTR(COMMAND_, FILL_IMAGE); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(COMMAND_, SVM_FREE); + ADD_ATTR(COMMAND_, SVM_MEMCPY); + ADD_ATTR(COMMAND_, SVM_MEMFILL); + ADD_ATTR(COMMAND_, SVM_MAP); + ADD_ATTR(COMMAND_, SVM_UNMAP); #endif } @@ -745,6 +910,9 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(PROFILING_COMMAND_, SUBMIT); ADD_ATTR(PROFILING_COMMAND_, START); ADD_ATTR(PROFILING_COMMAND_, END); +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR("profiling_info", PROFILING_COMMAND_, COMPLETE); +#endif } /* not needed--filled in automatically by implementation. -- GitLab From 294da2639f572058df0a919cb711918cf6d9befc Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 17:45:42 -0500 Subject: [PATCH 36/92] [pybind] versioning support for newer CL versions --- src/wrap_cl.hpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index ab735ea1..94efb804 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -59,7 +59,13 @@ #define PYOPENCL_CL_VERSION PYOPENCL_PRETEND_CL_VERSION #else -#if defined(CL_VERSION_1_2) +#if defined(CL_VERSION_2_2) +#define PYOPENCL_CL_VERSION 0x2020 +#elif defined(CL_VERSION_2_1) +#define PYOPENCL_CL_VERSION 0x2010 +#elif defined(CL_VERSION_2_0) +#define PYOPENCL_CL_VERSION 0x2000 +#elif defined(CL_VERSION_1_2) #define PYOPENCL_CL_VERSION 0x1020 #elif defined(CL_VERSION_1_1) #define PYOPENCL_CL_VERSION 0x1010 -- GitLab From c588d9f6c6502e1dccc7a7a950862dba4e96c568 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 17:46:05 -0500 Subject: [PATCH 37/92] [pybind] Fixes for CL2 constants --- pyopencl/__init__.py | 2 -- setup.py | 2 +- src/wrap_cl.hpp | 7 ++++--- src/wrap_constants.cpp | 9 +++++---- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 0b3de63a..eb124749 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -101,12 +101,10 @@ from pyopencl._cl import ( # noqa command_execution_status, profiling_info, mem_migration_flags, - mem_migration_flags_ext, device_partition_property, device_affinity_domain, gl_object_type, gl_texture_info, - migrate_mem_object_flags_ext, Error, MemoryError, LogicError, RuntimeError, diff --git a/setup.py b/setup.py index b8b44f40..0e6aaf80 100644 --- a/setup.py +++ b/setup.py @@ -223,10 +223,10 @@ def main(): ext_modules=[ NumpyExtension("pyopencl._cl", [ + "src/wrap_constants.cpp", "src/wrap_cl.cpp", "src/wrap_cl_part_1.cpp", "src/wrap_cl_part_2.cpp", - "src/wrap_constants.cpp", "src/wrap_mempool.cpp", "src/bitlog.cpp", ], diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 94efb804..03173cd6 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -812,9 +812,10 @@ namespace pyopencl case CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT: DEV_GET_INT_INF(cl_uint); #endif #if PYOPENCL_CL_VERSION >= 0x2010 - case CL_DEVICE_IL_VERSION: DEV_GET_INT_INF(); - case CL_DEVICE_MAX_NUM_SUB_GROUPS: DEV_GET_INT_INF(); - case CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: DEV_GET_INT_INF(); + case CL_DEVICE_IL_VERSION: + PYOPENCL_GET_STR_INFO(Device, m_device, param_name); + case CL_DEVICE_MAX_NUM_SUB_GROUPS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: DEV_GET_INT_INF(cl_bool); #endif #ifdef CL_DEVICE_ME_VERSION_INTEL case CL_DEVICE_ME_VERSION_INTEL: DEV_GET_INT_INF(cl_uint); diff --git a/src/wrap_constants.cpp b/src/wrap_constants.cpp index 7bf410c9..0627fe85 100644 --- a/src/wrap_constants.cpp +++ b/src/wrap_constants.cpp @@ -21,7 +21,9 @@ namespace class gl_context_info { }; class context_properties { }; class command_queue_info { }; + class queue_properties { }; class mem_flags { }; + class svm_mem_flags { }; class channel_order { }; class channel_type { }; class mem_object_type { }; @@ -557,9 +559,8 @@ void pyopencl_expose_constants(py::module &m) } { - // queue_properties -#if PYOPENCL_CL_VERSION >= 0x2000 py::class_ cls(m, "queue_properties"); +#if PYOPENCL_CL_VERSION >= 0x2000 ADD_ATTR(QUEUE_, PROPERTIES); ADD_ATTR(QUEUE_, SIZE); #endif @@ -587,8 +588,8 @@ void pyopencl_expose_constants(py::module &m) } { -#if PYOPENCL_CL_VERSION >= 0x2000 py::class_ cls(m, "svm_mem_flags"); +#if PYOPENCL_CL_VERSION >= 0x2000 ADD_ATTR(MEM_, READ_WRITE); ADD_ATTR(MEM_, WRITE_ONLY); ADD_ATTR(MEM_, READ_ONLY); @@ -911,7 +912,7 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(PROFILING_COMMAND_, START); ADD_ATTR(PROFILING_COMMAND_, END); #if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("profiling_info", PROFILING_COMMAND_, COMPLETE); + ADD_ATTR(PROFILING_COMMAND_, COMPLETE); #endif } -- GitLab From 567cdc0428e4666d7d27114fa518ecd1a96906fc Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 17:55:51 -0500 Subject: [PATCH 38/92] Also remove test_enqueue_task --- test/test_wrapper.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/test/test_wrapper.py b/test/test_wrapper.py index 03418480..e8f5c36b 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -665,36 +665,6 @@ def test_unload_compiler(platform): cl.unload_platform_compiler(platform) -def test_enqueue_task(ctx_factory): - ctx = ctx_factory() - queue = cl.CommandQueue(ctx) - mf = cl.mem_flags - - prg = cl.Program(ctx, """ - __kernel void - reverse(__global const float *in, __global float *out, int n) - { - for (int i = 0;i < n;i++) { - out[i] = in[n - 1 - i]; - } - } - """).build() - knl = prg.reverse - - n = 100 - a = np.random.rand(n).astype(np.float32) - b = np.empty_like(a) - - buf1 = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a) - buf2 = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes) - - knl.set_args(buf1, buf2, np.int32(n)) - cl.enqueue_task(queue, knl) - - cl.enqueue_copy(queue, b, buf2).wait() - assert la.norm(a[::-1] - b) == 0 - - def test_platform_get_devices(ctx_factory): ctx = ctx_factory() platform = ctx.devices[0].platform -- GitLab From 292d43a7a01cc95061d374736894bafe43d8e61b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 17:56:13 -0500 Subject: [PATCH 39/92] Add context manager functionality for MemoryMap --- pyopencl/__init__.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index eb124749..472d7dc0 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -1033,6 +1033,19 @@ def _add_functionality(): # }}} + # {{{ MemoryMap + + def memory_map_enter(self): + return self + + def memory_map_exit(self, exc_type, exc_val, exc_tb): + self.release() + + MemoryMap.__enter__ = memory_map_enter + MemoryMap.__exit__ = memory_map_exit + + # }}} + # ORDER DEPENDENCY: Some of the above may override get_info, the effect needs # to be visible through the attributes. So get_info attr creation needs to happen # after the overriding is complete. -- GitLab From 159e844039972d1b5470741196d16dfb03a8c551 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 23:35:17 -0500 Subject: [PATCH 40/92] [pybind] Add wrapper for DeviceTopologyAmd --- pyopencl/__init__.py | 3 +-- src/wrap_cl_part_2.cpp | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 472d7dc0..3d2547e0 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -176,8 +176,7 @@ from pyopencl._cl import ( # noqa Image, Sampler, GLTexture, - # FIXME - # DeviceTopologyAmd, + DeviceTopologyAmd, ) import inspect as _inspect diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index 0a31ac6c..1c62f2b2 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -441,6 +441,42 @@ void pyopencl_expose_part_2(py::module &m) #endif // }}} + + // {{{ CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD + + { + typedef cl_device_topology_amd cls; + py::class_(m, "DeviceTopologyAmd") + .def(py::init( + [](cl_char bus, cl_char device, cl_char function) + { + cl_device_topology_amd result; + result.pcie.bus = bus; + result.pcie.device = device; + result.pcie.function = function; + return result; + }), + py::arg("bus")=0, + py::arg("device")=0, + py::arg("function")=0) + + .def_property("type", + [](cls &t) { return t.pcie.type; }, + [](cls &t, cl_uint val) { t.pcie.type = val; }) + + .def_property("bus", + [](cls &t) { return t.pcie.bus; }, + [](cls &t, cl_char val) { t.pcie.bus = val; }) + .def_property("device", + [](cls &t) { return t.pcie.device; }, + [](cls &t, cl_char val) { t.pcie.device = val; }) + .def_property("function", + [](cls &t) { return t.pcie.function; }, + [](cls &t, cl_char val) { t.pcie.function = val; }) + ; + } + + // }}} } -- GitLab From 360fc25e2b0462e3212ad2788721fccc5012207e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 10 Aug 2018 23:35:42 -0500 Subject: [PATCH 41/92] Fix svm tests, add characterize.has_*_svm --- pyopencl/characterize/__init__.py | 30 ++++++++++++++++++++++++++++++ test/test_wrapper.py | 25 +++++++------------------ 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/pyopencl/characterize/__init__.py b/pyopencl/characterize/__init__.py index 26a4a688..873e1c11 100644 --- a/pyopencl/characterize/__init__.py +++ b/pyopencl/characterize/__init__.py @@ -387,3 +387,33 @@ def has_struct_arg_count_bug(dev, ctx=None): return "pocl" return False + + +def _may_have_svm(dev): + has_svm = (dev.platform._get_cl_version() >= (2, 0) and + cl.get_cl_header_version() >= (2, 0)) + + if dev.platform.name == "Portable Computing Language": + has_svm = ( + get_pocl_version(dev.platform) >= (1, 0) + and cl.get_cl_header_version() >= (2, 0)) + + return has_svm + + +def has_coarse_grain_buffer_svm(dev): + return (_may_have_svm(dev) and + bool(dev.svm_capabilities + & cl.device_svm_capabilities.COARSE_GRAIN_BUFFER)) + + +def has_fine_grain_buffer_svm(dev): + return (_may_have_svm(dev) and + bool(dev.svm_capabilities + & cl.device_svm_capabilities.FINE_GRAIN_BUFFER)) + + +def has_fine_grain_system_svm(dev): + return (_may_have_svm(dev) and + bool(dev.svm_capabilities + & cl.device_svm_capabilities.FINE_GRAIN_SYSTEM)) diff --git a/test/test_wrapper.py b/test/test_wrapper.py index e8f5c36b..bba4ca6a 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -306,7 +306,7 @@ def test_device_topology_amd_constructor(): assert topol.device == 4 assert topol.function == 5 - assert not topol.__dict__ + assert not hasattr(topol, "__dict__") def test_nonempty_supported_image_formats(ctx_factory): @@ -922,17 +922,10 @@ def test_coarse_grain_svm(ctx_factory): dev = ctx.devices[0] - has_svm = (ctx._get_cl_version() >= (2, 0) and - cl.get_cl_header_version() >= (2, 0)) - - if dev.platform.name == "Portable Computing Language": - has_svm = ( - get_pocl_version(dev.platform) >= (1, 0) - and cl.get_cl_header_version() >= (2, 0)) - - if not has_svm: - from pytest import skip - skip("SVM only available in OpenCL 2.0 and higher") + from pyopencl.characterize import has_coarse_grain_buffer_svm + from pytest import skip + if not has_coarse_grain_buffer_svm(queue.device): + skip("device does not support coarse-grain SVM") if ("AMD" in dev.platform.name and dev.type & cl.device_type.CPU): @@ -981,13 +974,9 @@ def test_fine_grain_svm(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) + from pyopencl.characterize import has_fine_grain_buffer_svm from pytest import skip - if (ctx._get_cl_version() < (2, 0) or - cl.get_cl_header_version() < (2, 0)): - skip("SVM only available in OpenCL 2.0 and higher") - - if not (ctx.devices[0].svm_capabilities - & cl.device_svm_capabilities.FINE_GRAIN_BUFFER): + if not has_fine_grain_buffer_svm(queue.device): skip("device does not support fine-grain SVM") n = 3000 -- GitLab From 91e79b66601d3ef9527e40b60b0c63d907b739b1 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 11 Aug 2018 21:19:24 -0500 Subject: [PATCH 42/92] PYTHON_FOREACH -> range-based for --- src/wrap_cl.hpp | 18 +++++++++--------- src/wrap_helpers.hpp | 3 --- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 03173cd6..c46191c8 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -149,7 +149,7 @@ } \ else \ { \ - PYTHON_FOREACH(py_dev, py_devices) \ + for (py::handle py_dev: py_devices) \ devices_vec.push_back( \ (py_dev).cast().data()); \ num_devices = devices_vec.size(); \ @@ -1034,7 +1034,7 @@ namespace pyopencl if (py_properties.ptr() != Py_None) { - PYTHON_FOREACH(prop_tuple_py, py_properties) + for (py::handle prop_tuple_py: py_properties) { py::tuple prop_tuple(prop_tuple_py.cast()); @@ -1423,7 +1423,7 @@ namespace pyopencl cl_uint num_events_in_wait_list = 0; std::vector event_wait_list(len(events)); - PYTHON_FOREACH(evt, events) + for (py::handle evt: events) event_wait_list[num_events_in_wait_list++] = evt.cast().data(); @@ -1482,7 +1482,7 @@ namespace pyopencl cl_uint num_events = 0; std::vector event_list(len(py_events)); - PYTHON_FOREACH(py_evt, py_events) + for (py::handle py_evt: py_events) event_list[num_events++] = py_evt.cast().data(); PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, ( @@ -1646,7 +1646,7 @@ namespace pyopencl PYOPENCL_PARSE_WAIT_FOR; std::vector mem_objects; - PYTHON_FOREACH(mo, py_mem_objects) + for (py::handle mo: py_mem_objects) mem_objects.push_back(mo.cast().data()); cl_event evt; @@ -1687,7 +1687,7 @@ namespace pyopencl clEnqueueMigrateMemObjectEXT, enqueue_migrate_fn); std::vector mem_objects; - PYTHON_FOREACH(mo, py_mem_objects) + for (py::handle mo: py_mem_objects) mem_objects.push_back(mo.cast().data()); cl_event evt; @@ -3289,7 +3289,7 @@ namespace pyopencl std::vector header_names; std::vector programs; - PYTHON_FOREACH(name_hdr_tup_py, py_headers) + for (py::handle name_hdr_tup_py: py_headers) { py::tuple name_hdr_tup = py::reinterpret_borrow(name_hdr_tup_py); if (py::len(name_hdr_tup) != 2) @@ -3467,7 +3467,7 @@ namespace pyopencl PYOPENCL_PARSE_PY_DEVICES; std::vector programs; - PYTHON_FOREACH(py_prg, py_programs) + for (py::handle py_prg: py_programs) { program &prg = (py_prg).cast(); programs.push_back(prg.data()); @@ -4015,7 +4015,7 @@ namespace pyopencl PYOPENCL_PARSE_WAIT_FOR; \ \ std::vector mem_objects; \ - PYTHON_FOREACH(mo, py_mem_objects) \ + for (py::handle mo: py_mem_objects) \ mem_objects.push_back((mo).cast().data()); \ \ cl_event evt; \ diff --git a/src/wrap_helpers.hpp b/src/wrap_helpers.hpp index 1635a684..d9b2389d 100644 --- a/src/wrap_helpers.hpp +++ b/src/wrap_helpers.hpp @@ -39,9 +39,6 @@ namespace py = pybind11; #define DEF_SIMPLE_RW_MEMBER(NAME) \ def_readwrite(#NAME, &cls::m_##NAME) -#define PYTHON_FOREACH(NAME, ITERABLE) \ - for (py::handle NAME: ITERABLE) - #define COPY_PY_LIST(TYPE, NAME) \ { \ for (auto it: py_##NAME) \ -- GitLab From 734ec09ebbc281309f19cc21a4aad620c7408e40 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 11 Aug 2018 21:20:34 -0500 Subject: [PATCH 43/92] NULL -> nullptr --- src/wrap_cl.hpp | 86 ++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index c46191c8..8b764bac 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -153,7 +153,7 @@ devices_vec.push_back( \ (py_dev).cast().data()); \ num_devices = devices_vec.size(); \ - devices = devices_vec.empty( ) ? NULL : &devices_vec.front(); \ + devices = devices_vec.empty( ) ? nullptr : &devices_vec.front(); \ } \ @@ -303,7 +303,7 @@ \ PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ (FIRST_ARG, SECOND_ARG, size, \ - RES_VEC.empty( ) ? NULL : &RES_VEC.front(), &size)); \ + RES_VEC.empty( ) ? nullptr : &RES_VEC.front(), &size)); \ } #define PYOPENCL_GET_STR_INFO(WHAT, FIRST_ARG, SECOND_ARG) \ @@ -315,7 +315,7 @@ std::vector param_value(param_value_size); \ PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ (FIRST_ARG, SECOND_ARG, param_value_size, \ - param_value.empty( ) ? NULL : ¶m_value.front(), ¶m_value_size)); \ + param_value.empty( ) ? nullptr : ¶m_value.front(), ¶m_value_size)); \ \ return py::cast( \ param_value.empty( ) ? "" : std::string(¶m_value.front(), param_value_size-1)); \ @@ -342,13 +342,13 @@ if (py_wait_for.ptr() != Py_None) \ { \ event_wait_list.resize(len(py_wait_for)); \ - PYTHON_FOREACH(evt, py_wait_for) \ + for (py::handle evt: py_wait_for) \ event_wait_list[num_events_in_wait_list++] = \ evt.cast().data(); \ } #define PYOPENCL_WAITLIST_ARGS \ - num_events_in_wait_list, event_wait_list.empty( ) ? NULL : &event_wait_list.front() + num_events_in_wait_list, event_wait_list.empty( ) ? nullptr : &event_wait_list.front() #define PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, obj) \ try \ @@ -520,7 +520,7 @@ namespace pyopencl std::vector platforms(num_platforms); PYOPENCL_CALL_GUARDED(clGetPlatformIDs, - (num_platforms, platforms.empty( ) ? NULL : &platforms.front(), &num_platforms)); + (num_platforms, platforms.empty( ) ? nullptr : &platforms.front(), &num_platforms)); py::list result; for (cl_platform_id pid: platforms) @@ -860,17 +860,17 @@ namespace pyopencl properties.push_back(0); cl_device_partition_property *props_ptr - = properties.empty( ) ? NULL : &properties.front(); + = properties.empty( ) ? nullptr : &properties.front(); cl_uint num_entries; PYOPENCL_CALL_GUARDED(clCreateSubDevices, - (m_device, props_ptr, 0, NULL, &num_entries)); + (m_device, props_ptr, 0, nullptr, &num_entries)); std::vector result; result.resize(num_entries); PYOPENCL_CALL_GUARDED(clCreateSubDevices, - (m_device, props_ptr, num_entries, &result.front(), NULL)); + (m_device, props_ptr, num_entries, &result.front(), nullptr)); py::list py_result; for (cl_device_id did: result) @@ -905,7 +905,7 @@ namespace pyopencl std::vector devices(num_devices); PYOPENCL_CALL_GUARDED(clGetDeviceIDs, (m_platform, devtype, - num_devices, devices.empty( ) ? NULL : &devices.front(), &num_devices)); + num_devices, devices.empty( ) ? nullptr : &devices.front(), &num_devices)); py::list result; for (cl_device_id did: devices) @@ -1093,7 +1093,7 @@ namespace pyopencl = parse_context_properties(py_properties); cl_context_properties *props_ptr - = props.empty( ) ? NULL : &props.front(); + = props.empty( ) ? nullptr : &props.front(); cl_int status_code; @@ -1107,14 +1107,14 @@ namespace pyopencl "one of 'devices' or 'dev_type' must be None"); std::vector devices; - PYTHON_FOREACH(py_dev, py_devices) + for (py::handle py_dev: py_devices) devices.push_back(py_dev.cast().data()); PYOPENCL_PRINT_CALL_TRACE("clCreateContext"); ctx = clCreateContext( props_ptr, devices.size(), - devices.empty( ) ? NULL : &devices.front(), + devices.empty( ) ? nullptr : &devices.front(), 0, 0, &status_code); } // from dev_type @@ -1486,7 +1486,7 @@ namespace pyopencl event_list[num_events++] = py_evt.cast().data(); PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, ( - cq.data(), num_events, event_list.empty( ) ? NULL : &event_list.front())); + cq.data(), num_events, event_list.empty( ) ? nullptr : &event_list.front())); } inline @@ -1653,7 +1653,7 @@ namespace pyopencl PYOPENCL_RETRY_IF_MEM_ERROR( PYOPENCL_CALL_GUARDED(clEnqueueMigrateMemObjects, ( cq.data(), - mem_objects.size(), mem_objects.empty( ) ? NULL : &mem_objects.front(), + mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(), flags, PYOPENCL_WAITLIST_ARGS, &evt )); @@ -1676,10 +1676,10 @@ namespace pyopencl // {{{ get platform cl_device_id dev; PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, (cq.data(), CL_QUEUE_DEVICE, - sizeof(dev), &dev, NULL)); + sizeof(dev), &dev, nullptr)); cl_platform_id plat; PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_PLATFORM, - sizeof(plat), &plat, NULL)); + sizeof(plat), &plat, nullptr)); // }}} #endif @@ -1694,7 +1694,7 @@ namespace pyopencl PYOPENCL_RETRY_IF_MEM_ERROR( PYOPENCL_CALL_GUARDED(enqueue_migrate_fn, ( cq.data(), - mem_objects.size(), mem_objects.empty( ) ? NULL : &mem_objects.front(), + mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(), flags, PYOPENCL_WAITLIST_ARGS, &evt )); @@ -2326,12 +2326,12 @@ namespace pyopencl cl_uint num_image_formats; PYOPENCL_CALL_GUARDED(clGetSupportedImageFormats, ( ctx.data(), flags, image_type, - 0, NULL, &num_image_formats)); + 0, nullptr, &num_image_formats)); std::vector formats(num_image_formats); PYOPENCL_CALL_GUARDED(clGetSupportedImageFormats, ( ctx.data(), flags, image_type, - formats.size(), formats.empty( ) ? NULL : &formats.front(), NULL)); + formats.size(), formats.empty( ) ? nullptr : &formats.front(), nullptr)); PYOPENCL_RETURN_VECTOR(cl_image_format, formats); } @@ -2940,9 +2940,9 @@ namespace pyopencl result = py::object(py::reinterpret_steal(PyArray_NewFromDescr( &PyArray_Type, tp_descr, shape.size(), - shape.empty() ? NULL : &shape.front(), - strides.empty() ? NULL : &strides.front(), - mapped, ary_flags, /*obj*/NULL))); + shape.empty() ? nullptr : &shape.front(), + strides.empty() ? nullptr : &strides.front(), + mapped, ary_flags, /*obj*/nullptr))); if (size_in_bytes != (npy_uintp) PyArray_NBYTES(result.ptr())) throw pyopencl::error("enqueue_map_buffer", CL_INVALID_VALUE, @@ -3024,9 +3024,9 @@ namespace pyopencl py::object result = py::reinterpret_steal(PyArray_NewFromDescr( &PyArray_Type, tp_descr, shape.size(), - shape.empty() ? NULL : &shape.front(), - strides.empty() ? NULL : &strides.front(), - mapped, ary_flags, /*obj*/NULL)); + shape.empty() ? nullptr : &shape.front(), + strides.empty() ? nullptr : &strides.front(), + mapped, ary_flags, /*obj*/nullptr)); py::object map_py(handle_from_new_ptr(map.release())); PyArray_BASE(result.ptr()) = map_py.ptr(); @@ -3200,7 +3200,7 @@ namespace pyopencl PYOPENCL_CALL_GUARDED(clGetProgramInfo, (m_program, param_name, sizes.size()*sizeof(unsigned char *), - result_ptrs.empty( ) ? NULL : &result_ptrs.front(), 0)); \ + result_ptrs.empty( ) ? nullptr : &result_ptrs.front(), 0)); \ py::list py_result; ptr = result.get(); @@ -3311,8 +3311,8 @@ namespace pyopencl PYOPENCL_CALL_GUARDED_THREADED(clCompileProgram, (m_program, num_devices, devices, options.c_str(), header_names.size(), - programs.empty() ? NULL : &programs.front(), - header_name_ptrs.empty() ? NULL : &header_name_ptrs.front(), + programs.empty() ? nullptr : &programs.front(), + header_name_ptrs.empty() ? nullptr : &header_name_ptrs.front(), 0, 0)); } #endif @@ -3397,10 +3397,10 @@ namespace pyopencl PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBinary"); cl_program result = clCreateProgramWithBinary( ctx.data(), num_devices, - devices.empty( ) ? NULL : &devices.front(), - sizes.empty( ) ? NULL : &sizes.front(), - binaries.empty( ) ? NULL : &binaries.front(), - binary_statuses.empty( ) ? NULL : &binary_statuses.front(), + devices.empty( ) ? nullptr : &devices.front(), + sizes.empty( ) ? nullptr : &sizes.front(), + binaries.empty( ) ? nullptr : &binaries.front(), + binary_statuses.empty( ) ? nullptr : &binary_statuses.front(), &status_code); if (status_code != CL_SUCCESS) throw pyopencl::error("clCreateProgramWithBinary", status_code); @@ -3479,7 +3479,7 @@ namespace pyopencl ctx.data(), num_devices, devices, options.c_str(), programs.size(), - programs.empty() ? NULL : &programs.front(), + programs.empty() ? nullptr : &programs.front(), 0, 0, &status_code); @@ -3764,7 +3764,7 @@ namespace pyopencl std::vector kernels(num_kernels); PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, ( pgm.data(), num_kernels, - kernels.empty( ) ? NULL : &kernels.front(), &num_kernels)); + kernels.empty( ) ? nullptr : &kernels.front(), &num_kernels)); py::list result; for (cl_kernel knl: kernels) @@ -3810,7 +3810,7 @@ namespace pyopencl while (global_work_size.size() < work_dim) global_work_size.push_back(1); - local_work_size_ptr = local_work_size.empty( ) ? NULL : &local_work_size.front(); + local_work_size_ptr = local_work_size.empty( ) ? nullptr : &local_work_size.front(); } if (g_times_l && local_work_size_ptr) @@ -3835,7 +3835,7 @@ namespace pyopencl global_work_offset[work_axis] *= local_work_size[work_axis]; } - global_work_offset_ptr = global_work_offset.empty( ) ? NULL : &global_work_offset.front(); + global_work_offset_ptr = global_work_offset.empty( ) ? nullptr : &global_work_offset.front(); } PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( { @@ -3845,7 +3845,7 @@ namespace pyopencl knl.data(), work_dim, global_work_offset_ptr, - global_work_size.empty( ) ? NULL : &global_work_size.front(), + global_work_size.empty( ) ? nullptr : &global_work_size.front(), local_work_size_ptr, PYOPENCL_WAITLIST_ARGS, &evt )); @@ -4021,7 +4021,7 @@ namespace pyopencl cl_event evt; \ PYOPENCL_CALL_GUARDED(clEnqueue##What##GLObjects, ( \ cq.data(), \ - mem_objects.size(), mem_objects.empty( ) ? NULL : &mem_objects.front(), \ + mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(), \ PYOPENCL_WAITLIST_ARGS, &evt \ )); \ \ @@ -4081,7 +4081,7 @@ namespace pyopencl "clGetGLContextInfoKHR extension function not present"); cl_context_properties *props_ptr - = props.empty( ) ? NULL : &props.front(); + = props.empty( ) ? nullptr : &props.front(); switch (param_name) { @@ -4106,7 +4106,7 @@ namespace pyopencl PYOPENCL_CALL_GUARDED(func_ptr, (props_ptr, param_name, size, - devices.empty( ) ? NULL : &devices.front(), &size)); + devices.empty( ) ? nullptr : &devices.front(), &size)); py::list result; for (cl_device_id did: devices) @@ -4265,8 +4265,8 @@ namespace pyopencl py::object result = py::reinterpret_steal(PyArray_NewFromDescr( &PyArray_Type, tp_descr, - dims.size(), &dims.front(), /*strides*/ NULL, - host_ptr, ary_flags, /*obj*/NULL)); + dims.size(), &dims.front(), /*strides*/ nullptr, + host_ptr, ary_flags, /*obj*/nullptr)); if ((size_t) PyArray_NBYTES(result.ptr()) > mem_obj_size) throw pyopencl::error("MemoryObject.get_host_array", -- GitLab From 3397e60dfdb380528c0edafdea2b903c3fc607e3 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 11 Aug 2018 21:21:16 -0500 Subject: [PATCH 44/92] Re-add SVM support --- pyopencl/__init__.py | 351 ++++++++++++++++++++++++++++++++++++----- src/wrap_cl.hpp | 303 ++++++++++++++++++++++++++++++++++- src/wrap_cl_part_2.cpp | 68 ++++++++ 3 files changed, 682 insertions(+), 40 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 3d2547e0..5b7f3544 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -110,7 +110,6 @@ from pyopencl._cl import ( # noqa Platform, get_platforms, - unload_platform_compiler, Device, Context, @@ -120,10 +119,6 @@ from pyopencl._cl import ( # noqa MemoryObject, MemoryMap, Buffer, - # FIXME - # SVMAllocation, - # SVM, - # SVMMap, _Program, Kernel, @@ -159,15 +154,8 @@ from pyopencl._cl import ( # noqa enqueue_fill_image, _enqueue_copy_image_to_buffer, _enqueue_copy_buffer_to_image, - # FIXME - # enqueue_svm_memfill, - # enqueue_svm_migratemem, have_gl, - # FIXME? - # _GLObject, - GLBuffer, - GLRenderBuffer, ImageFormat, get_supported_image_formats, @@ -175,40 +163,50 @@ from pyopencl._cl import ( # noqa ImageDescriptor, Image, Sampler, - GLTexture, DeviceTopologyAmd, ) -import inspect as _inspect +if get_cl_header_version() >= (1, 2): + from pyopencl._cl import ( # noqa + unload_platform_compiler, + ) -CONSTANT_CLASSES = tuple( - getattr(_cl, name) for name in dir(_cl) - if _inspect.isclass(getattr(_cl, name)) - and name[0].islower() and name not in ["zip", "map", "range"]) +if get_cl_header_version() >= (2, 0): + from pyopencl._cl import ( # noqa + SVMAllocation, + SVM, -_KERNEL_ARG_CLASSES = ( - MemoryObjectHolder, - Sampler, - LocalMemory, # FIXME - # SVM, + #enqueue_svm_migratemem, ) - if _cl.have_gl(): + from pyopencl._cl import ( # noqa + GLBuffer, + GLRenderBuffer, + GLTexture, + ) + try: from pyopencl._cl import get_apple_cgl_share_group # noqa except ImportError: pass try: - from pyopencl.cffi_cl import ( # noqa + from pyopencl._cl import ( # noqa enqueue_acquire_gl_objects, enqueue_release_gl_objects, ) except ImportError: pass +import inspect as _inspect + +CONSTANT_CLASSES = tuple( + getattr(_cl, name) for name in dir(_cl) + if _inspect.isclass(getattr(_cl, name)) + and name[0].islower() and name not in ["zip", "map", "range"]) + # {{{ diagnostics @@ -583,24 +581,24 @@ def link_program(context, programs, options=[], devices=None): # {{{ monkeypatch C++ wrappers to add functionality def _add_functionality(): - # {{{ Platform - - def platform_repr(self): - return "" % (self.name, self.int_ptr) - - def platform_get_cl_version(self): + def generic_get_cl_version(self): import re version_string = self.version match = re.match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", version_string) if match is None: - raise RuntimeError("platform %s returned non-conformant " + raise RuntimeError("%s %s returned non-conformant " "platform version string '%s'" % - (self, version_string)) + (type(self).__name__, self, version_string)) return int(match.group(1)), int(match.group(2)) + # {{{ Platform + + def platform_repr(self): + return "" % (self.name, self.int_ptr) + Platform.__repr__ = platform_repr - Platform._get_cl_version = platform_get_cl_version + Platform._get_cl_version = generic_get_cl_version # }}} @@ -616,6 +614,7 @@ def _add_functionality(): Device.__repr__ = device_repr # undocumented for now: + Device._get_cl_version = generic_get_cl_version Device.persistent_unique_id = property(device_persistent_unique_id) # }}} @@ -1045,6 +1044,187 @@ def _add_functionality(): # }}} + # {{{ SVMAllocation + + SVMAllocation.__doc__ = """An object whose lifetime is tied to an allocation of shared virtual memory. + + .. note:: + + Most likely, you will not want to use this directly, but rather + :func:`svm_empty` and related functions which allow access to this + functionality using a friendlier, more Pythonic interface. + + .. versionadded:: 2016.2 + + .. automethod:: __init__(self, ctx, size, alignment, flags=None) + .. automethod:: release + .. automethod:: enqueue_release + """ + + svmallocation_old_init = SVMAllocation.__init__ + + def svmallocation_init(self, ctx, size, alignment, flags, _interface=None): + """ + :arg ctx: a :class:`Context` + :arg flags: some of :class:`svm_mem_flags`. + """ + svmallocation_old_init(self, ctx, size, alignment, flags) + + read_write = ( + flags & mem_flags.WRITE_ONLY != 0 + or flags & mem_flags.READ_WRITE != 0) + + _interface["data"] = ( + int(self._ptr_as_int()), not read_write) + + self.__array_interface__ = _interface + + SVMAllocation.__init__ = svmallocation_init + # FIXME + # SVMAllocation.enqueue_release.__doc__ = """ + # :returns: a :class:`pyopencl.Event` + + # |std-enqueue-blurb| + # """ + + # }}} + + # {{{ SVM + + SVM.__doc__ = """Tags an object exhibiting the Python buffer interface (such as a + :class:`numpy.ndarray`) as referring to shared virtual memory. + + Depending on the features of the OpenCL implementation, the following + types of objects may be passed to/wrapped in this type: + + * coarse-grain shared memory as returned by (e.g.) :func:`csvm_empty` + for any implementation of OpenCL 2.0. + + This is how coarse-grain SVM may be used from both host and device:: + + svm_ary = cl.SVM(cl.csvm_empty(ctx, 1000, np.float32, alignment=64)) + assert isinstance(svm_ary.mem, np.ndarray) + + with svm_ary.map_rw(queue) as ary: + ary.fill(17) # use from host + + prg.twice(queue, svm_ary.mem.shape, None, svm_ary) + + * fine-grain shared memory as returned by (e.g.) :func:`fsvm_empty`, + if the implementation supports fine-grained shared virtual memory. + This memory may directly be passed to a kernel:: + + ary = cl.fsvm_empty(ctx, 1000, np.float32) + assert isinstance(ary, np.ndarray) + + prg.twice(queue, ary.shape, None, cl.SVM(ary)) + queue.finish() # synchronize + print(ary) # access from host + + Observe how mapping (as needed in coarse-grain SVM) is no longer + necessary. + + * any :class:`numpy.ndarray` (or other Python object with a buffer + interface) if the implementation supports fine-grained *system* shared + virtual memory. + + This is how plain :mod:`numpy` arrays may directly be passed to a + kernel:: + + ary = np.zeros(1000, np.float32) + prg.twice(queue, ary.shape, None, cl.SVM(ary)) + queue.finish() # synchronize + print(ary) # access from host + + Objects of this type may be passed to kernel calls and :func:`enqueue_copy`. + Coarse-grain shared-memory *must* be mapped into host address space using + :meth:`map` before being accessed through the :mod:`numpy` interface. + + .. note:: + + This object merely serves as a 'tag' that changes the behavior + of functions to which it is passed. It has no special management + relationship to the memory it tags. For example, it is permissible + to grab a :mod:`numpy.array` out of :attr:`SVM.mem` of one + :class:`SVM` instance and use the array to construct another. + Neither of the tags need to be kept alive. + + .. versionadded:: 2016.2 + + .. attribute:: mem + + The wrapped object. + + .. automethod:: __init__ + .. automethod:: map + .. automethod:: map_ro + .. automethod:: map_rw + .. automethod:: as_buffer + """ + + svm_old_init = SVM.__init__ + + def svm_init(self, mem): + svm_old_init(self, mem) + + self.mem = mem + + def svm_map(self, queue, flags, is_blocking=True, wait_for=None): + """ + :arg is_blocking: If *False*, subsequent code must wait on + :attr:`SVMMap.event` in the returned object before accessing the + mapped memory. + :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to + read-write. + :returns: an :class:`SVMMap` instance + + |std-enqueue-blurb| + """ + return SVMMap( + self, + queue, + _cl._enqueue_svm_map(queue, is_blocking, flags, self, wait_for)) + + def svm_map_ro(self, queue, is_blocking=True, wait_for=None): + """Like :meth:`map`, but with *flags* set for a read-only map.""" + + return self.map(queue, map_flags.READ, + is_blocking=is_blocking, wait_for=wait_for) + + def svm_map_rw(self, queue, is_blocking=True, wait_for=None): + """Like :meth:`map`, but with *flags* set for a read-only map.""" + + return self.map(queue, map_flags.READ | map_flags.WRITE, + is_blocking=is_blocking, wait_for=wait_for) + + def svm__enqueue_unmap(self, queue, wait_for=None): + return _cl._enqueue_svm_unmap(queue, self, wait_for) + + def svm_as_buffer(self, ctx, flags=None): + """ + :arg ctx: a :class:`Context` + :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to + read-write. + :returns: a :class:`Buffer` corresponding to *self*. + + The memory referred to by this object must not be freed before + the returned :class:`Buffer` is released. + """ + + if flags is None: + flags = mem_flags.READ_WRITE + + return Buffer(ctx, flags, size=self.mem.nbytes, hostbuf=self.mem) + + SVM.__init__ = svm_init + SVM.map = svm_map + SVM.map_ro = svm_map_ro + SVM.map_rw = svm_map_rw + SVM._enqueue_unmap = svm__enqueue_unmap + SVM.as_buffer = svm_as_buffer + + # }}} + # ORDER DEPENDENCY: Some of the above may override get_info, the effect needs # to be visible through the attributes. So get_info attr creation needs to happen # after the overriding is complete. @@ -1266,6 +1446,53 @@ _csc = create_some_context # }}} +# {{{ SVMMap + +class SVMMap(object): + """ + .. attribute:: event + + .. versionadded:: 2016.2 + + .. automethod:: release + + This class may also be used as a context manager in a ``with`` statement. + :meth:`release` will be called upon exit from the ``with`` region. + The value returned to the ``as`` part of the context manager is the + mapped Python object (e.g. a :mod:`numpy` array). + """ + def __init__(self, svm, queue, event): + self.svm = svm + self.queue = queue + self.event = event + + def __del__(self): + if self.svm is not None: + self.release() + + def __enter__(self): + return self.svm.mem + + def __exit__(self, exc_type, exc_val, exc_tb): + self.release() + + def release(self, queue=None, wait_for=None): + """ + :arg queue: a :class:`pyopencl.CommandQueue`. Defaults to the one + with which the map was created, if not specified. + :returns: a :class:`pyopencl.Event` + + |std-enqueue-blurb| + """ + + evt = self.svm._enqueue_unmap(self.queue) + self.svm = None + + return evt + +# }}} + + # {{{ enqueue_copy def enqueue_copy(queue, dest, src, **kwargs): @@ -1442,14 +1669,13 @@ def enqueue_copy(queue, dest, src, **kwargs): else: raise ValueError("invalid dest mem object type") - # FIXME - # elif isinstance(dest, SVM): - elif 0: + elif isinstance(dest, SVM): # to SVM if isinstance(src, SVM): src = src.mem return _cl._enqueue_svm_memcpy(queue, dest.mem, src, **kwargs) + else: # assume to-host @@ -1599,6 +1825,48 @@ def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None): # {{{ numpy-like svm allocation +def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None): + """Fill shared virtual memory with a pattern. + + :arg dest: a Python buffer object, optionally wrapped in an :class:`SVM` object + :arg pattern: a Python buffer object (e.g. a :class:`numpy.ndarray` with the + fill pattern to be used. + :arg byte_count: The size of the memory to be fill. Defaults to the + entirety of *dest*. + + |std-enqueue-blurb| + + .. versionadded:: 2016.2 + """ + + if not isinstance(dest, SVM): + dest = SVM(dest) + + return _cl._enqueue_svm_memfill( + queue, dest, pattern, byte_count=None, wait_for=None) + + +def enqueue_svm_migratemem(queue, svms, flags, wait_for=None): + """ + :arg svms: a collection of Python buffer objects (e.g. :mod:`numpy` + arrrays), optionally wrapped in :class:`SVM` objects. + :arg flags: a combination of :class:`mem_migration_flags` + + |std-enqueue-blurb| + + .. versionadded:: 2016.2 + + This function requires OpenCL 2.1. + """ + + return _cl._enqueue_svm_migratemem( + queue, + [svm.mem if isinstance(svm, SVM) else svm + for svm in svms], + flags, + wait_for) + + def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None): """Allocate an empty :class:`numpy.ndarray` of the given *shape*, *dtype* and *order*. (See :func:`numpy.empty` for the meaning of these arguments.) @@ -1746,4 +2014,13 @@ def fsvm_empty_like(ctx, ary, alignment=None): # }}} + +_KERNEL_ARG_CLASSES = ( + MemoryObjectHolder, + Sampler, + LocalMemory, + SVM, + ) + + # vim: foldmethod=marker diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 8b764bac..36881b9f 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -3041,6 +3041,289 @@ namespace pyopencl // }}} + // {{{ svm + +#if PYOPENCL_CL_VERSION >= 0x2000 + + class svm_arg_wrapper + { + private: + void *m_ptr; + PYOPENCL_BUFFER_SIZE_T m_size; +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr ward; +#endif + + public: + svm_arg_wrapper(py::object holder) + { +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + ward = std::unique_ptr(new py_buffer_wrapper); + ward->get(holder.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); + m_ptr = ward->m_buf.buf; + m_size = ward->m_buf.len; +#else + py::object ward = buffer; + if (PyObject_AsWriteBuffer(buffer.ptr(), &m_ptr, &m_size)) + throw py::error_already_set(); +#endif + } + + void *ptr() const + { + return m_ptr; + } + size_t size() const + { + return m_size; + } + }; + + + class svm_allocation : noncopyable + { + private: + std::shared_ptr m_context; + void *m_allocation; + + public: + svm_allocation(std::shared_ptr const &ctx, size_t size, cl_uint alignment, cl_svm_mem_flags flags) + : m_context(ctx) + { + PYOPENCL_PRINT_CALL_TRACE("clSVMalloc"); + m_allocation = clSVMAlloc( + ctx->data(), + flags, size, alignment); + + if (!m_allocation) + throw pyopencl::error("clSVMAlloc", CL_OUT_OF_RESOURCES); + } + + ~svm_allocation() + { + if (m_allocation) + release(); + } + + void release() + { + if (!m_allocation) + throw error("SVMAllocation.release", CL_INVALID_VALUE, + "trying to double-unref svm allocation"); + + clSVMFree(m_context->data(), m_allocation); + m_allocation = nullptr; + } + + void enqueue_release(command_queue &queue, py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + + if (!m_allocation) + throw error("SVMAllocation.release", CL_INVALID_VALUE, + "trying to double-unref svm allocation"); + + cl_event evt; + + PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueSVMFree, ( + queue.data(), 1, &m_allocation, + nullptr, nullptr, + PYOPENCL_WAITLIST_ARGS, &evt)); + + m_allocation = nullptr; + } + + void *ptr() const + { + return m_allocation; + } + + intptr_t ptr_as_int() const + { + return (intptr_t) m_allocation; + } + + bool operator==(svm_allocation const &other) const + { + return m_allocation == other.m_allocation; + } + + bool operator!=(svm_allocation const &other) const + { + return m_allocation != other.m_allocation; + } + }; + + + inline + event *enqueue_svm_memcpy( + command_queue &cq, + cl_bool is_blocking, + svm_arg_wrapper &dst, svm_arg_wrapper &src, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + if (src.size() != dst.size()) + throw error("_enqueue_svm_memcpy", CL_INVALID_VALUE, + "sizes of source and destination buffer do not match"); + + cl_event evt; + PYOPENCL_CALL_GUARDED( + clEnqueueSVMMemcpy, + ( + cq.data(), + is_blocking, + dst.ptr(), src.ptr(), + dst.size(), + PYOPENCL_WAITLIST_ARGS, + &evt + )); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + + inline + event *enqueue_svm_memfill( + command_queue &cq, + svm_arg_wrapper &dst, py::object py_pattern, + py::object byte_count, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + void *pattern_ptr; + PYOPENCL_BUFFER_SIZE_T pattern_len; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr pattern_ward(new py_buffer_wrapper); + + pattern_ward->get(py_pattern.ptr(), PyBUF_ANY_CONTIGUOUS); + + pattern_ptr = pattern_ward->m_buf.buf; + pattern_len = pattern_ward->m_buf.len; +#else + py::object pattern_ward = py_pattern; + if (PyObject_AsReadBuffer(buffer.ptr(), &pattern_buf, &pattern_len)) + throw py::error_already_set(); +#endif + + size_t fill_size = dst.size(); + if (!byte_count.is_none()) + fill_size = py::cast(byte_count); + + cl_event evt; + PYOPENCL_CALL_GUARDED( + clEnqueueSVMMemFill, + ( + cq.data(), + dst.ptr(), pattern_ptr, + pattern_len, + fill_size, + PYOPENCL_WAITLIST_ARGS, + &evt + )); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + + inline + event *enqueue_svm_map( + command_queue &cq, + cl_bool is_blocking, + cl_map_flags flags, + svm_arg_wrapper &svm, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + cl_event evt; + PYOPENCL_CALL_GUARDED( + clEnqueueSVMMap, + ( + cq.data(), + is_blocking, + flags, + svm.ptr(), svm.size(), + PYOPENCL_WAITLIST_ARGS, + &evt + )); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + + inline + event *enqueue_svm_unmap( + command_queue &cq, + svm_arg_wrapper &svm, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + cl_event evt; + PYOPENCL_CALL_GUARDED( + clEnqueueSVMUnmap, + ( + cq.data(), + svm.ptr(), + PYOPENCL_WAITLIST_ARGS, + &evt + )); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } +#endif + + +#if PYOPENCL_CL_VERSION >= 0x2010 + inline + event *enqueue_svm_migratemem( + command_queue &cq, + py::sequence svms, + cl_mem_migration_flags flags, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + std::vector svm_pointers; + std::vector sizes; + + for (py::handle py_svm: svms) + { + svm_arg_wrapper &svm(py::cast(py_svm)); + + svm_pointers.push_back(svm.ptr()); + sizes.push_back(svm.size()); + } + + cl_event evt; + PYOPENCL_CALL_GUARDED( + clEnqueueSVMMigrateMem, + ( + cq.data(), + svm_pointers.size(), + svm_pointers.empty() ? nullptr : &svm_pointers.front(), + sizes.empty() ? nullptr : &sizes.front(), + flags, + PYOPENCL_WAITLIST_ARGS, + &evt + )); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } +#endif + + // }}} + + // {{{ sampler class sampler : noncopyable @@ -3626,6 +3909,14 @@ namespace pyopencl (m_kernel, arg_index, len, buf)); } +#if PYOPENCL_CL_VERSION >= 0x2000 + void set_arg_svm(cl_uint arg_index, svm_arg_wrapper const &wrp) + { + PYOPENCL_CALL_GUARDED(clSetKernelArgSVMPointer, + (m_kernel, arg_index, wrp.ptr())); + } +#endif + void set_arg(cl_uint arg_index, py::object arg) { if (arg.ptr() == Py_None) @@ -3641,6 +3932,15 @@ namespace pyopencl } catch (py::cast_error &) { } +#if PYOPENCL_CL_VERSION >= 0x2000 + try + { + set_arg_svm(arg_index, arg.cast()); + return; + } + catch (py::cast_error &) { } +#endif + try { set_arg_local(arg_index, arg.cast()); @@ -4282,9 +4582,6 @@ namespace pyopencl // }}} } - - - #endif // vim: foldmethod=marker diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index 1c62f2b2..acf600c7 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -230,6 +230,71 @@ void pyopencl_expose_part_2(py::module &m) // }}} + // {{{ svm + +#if PYOPENCL_CL_VERSION >= 0x2000 + { + typedef svm_arg_wrapper cls; + py::class_(m, "SVM", py::dynamic_attr()) + .def(py::init()) + ; + } + + { + typedef svm_allocation cls; + py::class_(m, "SVMAllocation", py::dynamic_attr()) + .def(py::init, size_t, cl_uint, cl_svm_mem_flags>()) + .DEF_SIMPLE_METHOD(release) + .DEF_SIMPLE_METHOD(enqueue_release) + .def("_ptr_as_int", &cls::ptr_as_int) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::ptr_as_int) + ; + } + + m.def("_enqueue_svm_memcpyw", enqueue_svm_memcpy, + py::arg("queue"), + py::arg("is_blocking"), + py::arg("dst"), + py::arg("src"), + py::arg("wait_for")=py::none() + ); + + m.def("_enqueue_svm_memfill", enqueue_svm_memfill, + py::arg("queue"), + py::arg("dst"), + py::arg("pattern"), + py::arg("byte_count")=py::none(), + py::arg("wait_for")=py::none() + ); + + m.def("_enqueue_svm_map", enqueue_svm_map, + py::arg("queue"), + py::arg("is_blocking"), + py::arg("flags"), + py::arg("svm"), + py::arg("wait_for")=py::none() + ); + + m.def("_enqueue_svm_unmap", enqueue_svm_unmap, + py::arg("queue"), + py::arg("svm"), + py::arg("wait_for")=py::none() + ); +#endif + +#if PYOPENCL_CL_VERSION >= 0x2010 + m.def("_enqueue_svm_migrate_mem", enqueue_svm_migratemem, + py::arg("queue"), + py::arg("svms"), + py::arg("flags")=py::none(), + py::arg("wait_for")=py::none() + ); +#endif + + // }}} + // {{{ sampler { typedef sampler cls; @@ -323,6 +388,9 @@ void pyopencl_expose_part_2(py::module &m) .DEF_SIMPLE_METHOD(get_work_group_info) .def("_set_arg_null", &cls::set_arg_null) .def("_set_arg_buf", &cls::set_arg_buf) +#if PYOPENCL_CL_VERSION >= 0x1020 + .def("_set_arg_svm", &cls::set_arg_svm) +#endif .DEF_SIMPLE_METHOD(set_arg) #if PYOPENCL_CL_VERSION >= 0x1020 .DEF_SIMPLE_METHOD(get_arg_info) -- GitLab From 6fe7f4a6fe56819c66155c1764ed6367d5a286e7 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 11 Aug 2018 21:34:47 -0500 Subject: [PATCH 45/92] Stop including pybind11 as a submodule --- .gitlab-ci.yml | 28 ++++++++--------- .gitmodules | 3 -- pybind11 | 1 - setup.py | 85 +++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 98 insertions(+), 19 deletions(-) delete mode 160000 pybind11 diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0047755f..1687d168 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,7 +2,7 @@ script: - export PY_EXE=python2.7 - export PYOPENCL_TEST=amd:pu - - export EXTRA_INSTALL="numpy mako scipy pyfmmlib" + - export EXTRA_INSTALL="pybind11 numpy mako scipy pyfmmlib" - echo "CL_ENABLE_GL = True" > siteconf.py - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" @@ -18,7 +18,7 @@ Python 3.6 Intel CPU: script: - export PY_EXE=python3.6 - export PYOPENCL_TEST="intel(r):pu" - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" allow_failure: true @@ -32,7 +32,7 @@ Python 3.6 AMD CPU: script: - export PY_EXE=python3.6 - export PYOPENCL_TEST=amd:pu - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" allow_failure: true @@ -46,7 +46,7 @@ Python 3.6 Titan X: script: - export PY_EXE=python3.5 - export PYOPENCL_TEST=nvi:titan - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -59,7 +59,7 @@ Python 3.6 K40: script: - export PY_EXE=python3.6 - export PYOPENCL_TEST=nvi:k40 - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -72,7 +72,7 @@ Python 3.6 AMD GPU: script: - export PY_EXE=python3.6 - export PYOPENCL_TEST=amd:fiji - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" allow_failure: true @@ -86,7 +86,7 @@ Python 2.6 POCL CPU: script: - export PY_EXE=python2.6 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" @@ -100,7 +100,7 @@ Python 2.7 POCL: script: - export PY_EXE=python2.7 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -113,7 +113,7 @@ Python 3.7 POCL: script: - export PY_EXE=python3.7 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -126,7 +126,7 @@ Python 3.6 POCL CL 1.1: script: - export PY_EXE=python3.5 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - echo "CL_PRETEND_VERSION = '1.1'" > siteconf.py - ". ./build-and-test-py-project.sh" @@ -140,7 +140,7 @@ Python 3.6 POCL: script: - export PY_EXE=python3.6 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -153,7 +153,7 @@ Python 2.7 Apple: script: - export PY_EXE=python2.7 - export PYOPENCL_TEST=app:cpu - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - export PKG_CONFIG_PATH=/usr/local/opt/libffi/lib/pkgconfig - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" @@ -178,7 +178,7 @@ PyPy POCL: script: - export PY_EXE=pypy - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" @@ -190,7 +190,7 @@ PyPy POCL: Documentation: script: - - EXTRA_INSTALL="numpy mako" + - EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-docs.sh - ". ./build-docs.sh" tags: diff --git a/.gitmodules b/.gitmodules index b675a6cc..779ec487 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "pyopencl/compyte"] path = pyopencl/compyte url = https://github.com/inducer/compyte -[submodule "pybind11"] - path = pybind11 - url = https://github.com/pybind/pybind11.git diff --git a/pybind11 b/pybind11 deleted file mode 160000 index f7bc18f5..00000000 --- a/pybind11 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f7bc18f528bb35cd06c93d0a58c17e6eea3fa68c diff --git a/setup.py b/setup.py index 0e6aaf80..092ac928 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,82 @@ THE SOFTWARE. import sys from os.path import exists +import setuptools +from setuptools.command.build_ext import build_ext + + +# {{{ boilerplate from https://github.com/pybind/python_example/blob/2ed5a68759cd6ff5d2e5992a91f08616ef457b5c/setup.py # noqa + +class get_pybind_include(object): # noqa: N801 + """Helper class to determine the pybind11 include path + + The purpose of this class is to postpone importing pybind11 + until it is actually installed, so that the ``get_include()`` + method can be invoked. """ + + def __init__(self, user=False): + self.user = user + + def __str__(self): + import pybind11 + return pybind11.get_include(self.user) + + +# As of Python 3.6, CCompiler has a `has_flag` method. +# cf http://bugs.python.org/issue26689 +def has_flag(compiler, flagname): + """Return a boolean indicating whether a flag name is supported on + the specified compiler. + """ + import tempfile + with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f: + f.write('int main (int argc, char **argv) { return 0; }') + try: + compiler.compile([f.name], extra_postargs=[flagname]) + except setuptools.distutils.errors.CompileError: + return False + return True + + +def cpp_flag(compiler): + """Return the -std=c++[11/14] compiler flag. + + The c++14 is prefered over c++11 (when it is available). + """ + if has_flag(compiler, '-std=c++14'): + return '-std=c++14' + elif has_flag(compiler, '-std=c++11'): + return '-std=c++11' + else: + raise RuntimeError('Unsupported compiler -- at least C++11 support ' + 'is needed!') + + +class BuildExt(build_ext): + """A custom build extension for adding compiler-specific options.""" + c_opts = { + 'msvc': ['/EHsc'], + 'unix': [], + } + + if sys.platform == 'darwin': + c_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7'] + + def build_extensions(self): + ct = self.compiler.compiler_type + opts = self.c_opts.get(ct, []) + if ct == 'unix': + opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version()) + opts.append(cpp_flag(self.compiler)) + if has_flag(self.compiler, '-fvisibility=hidden'): + opts.append('-fvisibility=hidden') + elif ct == 'msvc': + opts.append('/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version()) + for ext in self.extensions: + ext.extra_compile_args = opts + build_ext.build_extensions(self) + +# }}} def get_config_schema(): @@ -230,12 +306,16 @@ def main(): "src/wrap_mempool.cpp", "src/bitlog.cpp", ], - include_dirs=INCLUDE_DIRS, + include_dirs=INCLUDE_DIRS + [ + get_pybind_include(), + get_pybind_include(user=True) + ], library_dirs=conf["CL_LIB_DIR"], libraries=conf["CL_LIBNAME"], define_macros=list(conf["EXTRA_DEFINES"].items()), extra_compile_args=conf["CXXFLAGS"], extra_link_args=conf["LDFLAGS"], + language='c++', ), ], @@ -263,8 +343,11 @@ def main(): ] }, + cmdclass={'build_ext': BuildExt}, zip_safe=False) if __name__ == '__main__': main() + +# vim: foldmethod=marker -- GitLab From 524269300eea32bd23356b2e2c67528cbcd2d31a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 11 Aug 2018 21:40:23 -0500 Subject: [PATCH 46/92] Make more GL imports conditional on have_gl --- pyopencl/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 5b7f3544..f8e248e9 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -103,8 +103,6 @@ from pyopencl._cl import ( # noqa mem_migration_flags, device_partition_property, device_affinity_domain, - gl_object_type, - gl_texture_info, Error, MemoryError, LogicError, RuntimeError, @@ -182,6 +180,9 @@ if get_cl_header_version() >= (2, 0): if _cl.have_gl(): from pyopencl._cl import ( # noqa + gl_object_type, + gl_texture_info, + GLBuffer, GLRenderBuffer, GLTexture, -- GitLab From 4aa53e67407013d69b6bf1f92c9126e3093bb1d5 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 11 Aug 2018 21:55:19 -0500 Subject: [PATCH 47/92] [pybind11] Build/run with pretend-CL1.1 --- pyopencl/__init__.py | 183 +++++++++++++++++++++++-------------------- 1 file changed, 99 insertions(+), 84 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index f8e248e9..874cadac 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -124,18 +124,13 @@ from pyopencl._cl import ( # noqa Event, wait_for_events, NannyEvent, - UserEvent, enqueue_nd_range_kernel, - _enqueue_marker_with_wait_list, _enqueue_marker, - _enqueue_barrier_with_wait_list, - enqueue_migrate_mem_objects, enqueue_migrate_mem_object_ext, - _enqueue_barrier_with_wait_list, _enqueue_read_buffer, _enqueue_write_buffer, _enqueue_copy_buffer, @@ -144,12 +139,10 @@ from pyopencl._cl import ( # noqa _enqueue_copy_buffer_rect, enqueue_map_buffer, - _enqueue_fill_buffer, _enqueue_read_image, _enqueue_copy_image, _enqueue_write_image, enqueue_map_image, - enqueue_fill_image, _enqueue_copy_image_to_buffer, _enqueue_copy_buffer_to_image, @@ -158,15 +151,28 @@ from pyopencl._cl import ( # noqa ImageFormat, get_supported_image_formats, - ImageDescriptor, Image, Sampler, DeviceTopologyAmd, ) +if get_cl_header_version() >= (1, 1): + from pyopencl._cl import ( # noqa + UserEvent, + ) if get_cl_header_version() >= (1, 2): from pyopencl._cl import ( # noqa + _enqueue_marker_with_wait_list, + _enqueue_barrier_with_wait_list, + unload_platform_compiler, + + + enqueue_migrate_mem_objects, + _enqueue_fill_buffer, + enqueue_fill_image, + + ImageDescriptor, ) if get_cl_header_version() >= (2, 0): @@ -1047,22 +1053,24 @@ def _add_functionality(): # {{{ SVMAllocation - SVMAllocation.__doc__ = """An object whose lifetime is tied to an allocation of shared virtual memory. + if get_cl_header_version() >= (2, 0): + SVMAllocation.__doc__ = """An object whose lifetime is tied to an allocation of shared virtual memory. - .. note:: + .. note:: - Most likely, you will not want to use this directly, but rather - :func:`svm_empty` and related functions which allow access to this - functionality using a friendlier, more Pythonic interface. + Most likely, you will not want to use this directly, but rather + :func:`svm_empty` and related functions which allow access to this + functionality using a friendlier, more Pythonic interface. - .. versionadded:: 2016.2 + .. versionadded:: 2016.2 - .. automethod:: __init__(self, ctx, size, alignment, flags=None) - .. automethod:: release - .. automethod:: enqueue_release - """ + .. automethod:: __init__(self, ctx, size, alignment, flags=None) + .. automethod:: release + .. automethod:: enqueue_release + """ - svmallocation_old_init = SVMAllocation.__init__ + if get_cl_header_version() >= (2, 0): + svmallocation_old_init = SVMAllocation.__init__ def svmallocation_init(self, ctx, size, alignment, flags, _interface=None): """ @@ -1080,90 +1088,95 @@ def _add_functionality(): self.__array_interface__ = _interface - SVMAllocation.__init__ = svmallocation_init - # FIXME - # SVMAllocation.enqueue_release.__doc__ = """ - # :returns: a :class:`pyopencl.Event` + if get_cl_header_version() >= (2, 0): + SVMAllocation.__init__ = svmallocation_init + # FIXME + # SVMAllocation.enqueue_release.__doc__ = """ + # :returns: a :class:`pyopencl.Event` - # |std-enqueue-blurb| - # """ + # |std-enqueue-blurb| + # """ # }}} # {{{ SVM - SVM.__doc__ = """Tags an object exhibiting the Python buffer interface (such as a - :class:`numpy.ndarray`) as referring to shared virtual memory. + if get_cl_header_version() >= (2, 0): + SVM.__doc__ = """Tags an object exhibiting the Python buffer interface (such as a + :class:`numpy.ndarray`) as referring to shared virtual memory. - Depending on the features of the OpenCL implementation, the following - types of objects may be passed to/wrapped in this type: + Depending on the features of the OpenCL implementation, the following + types of objects may be passed to/wrapped in this type: - * coarse-grain shared memory as returned by (e.g.) :func:`csvm_empty` - for any implementation of OpenCL 2.0. + * coarse-grain shared memory as returned by (e.g.) :func:`csvm_empty` + for any implementation of OpenCL 2.0. - This is how coarse-grain SVM may be used from both host and device:: + This is how coarse-grain SVM may be used from both host and device:: - svm_ary = cl.SVM(cl.csvm_empty(ctx, 1000, np.float32, alignment=64)) - assert isinstance(svm_ary.mem, np.ndarray) + svm_ary = cl.SVM( + cl.csvm_empty(ctx, 1000, np.float32, alignment=64)) + assert isinstance(svm_ary.mem, np.ndarray) - with svm_ary.map_rw(queue) as ary: - ary.fill(17) # use from host + with svm_ary.map_rw(queue) as ary: + ary.fill(17) # use from host - prg.twice(queue, svm_ary.mem.shape, None, svm_ary) + prg.twice(queue, svm_ary.mem.shape, None, svm_ary) - * fine-grain shared memory as returned by (e.g.) :func:`fsvm_empty`, - if the implementation supports fine-grained shared virtual memory. - This memory may directly be passed to a kernel:: + * fine-grain shared memory as returned by (e.g.) :func:`fsvm_empty`, + if the implementation supports fine-grained shared virtual memory. + This memory may directly be passed to a kernel:: - ary = cl.fsvm_empty(ctx, 1000, np.float32) - assert isinstance(ary, np.ndarray) + ary = cl.fsvm_empty(ctx, 1000, np.float32) + assert isinstance(ary, np.ndarray) - prg.twice(queue, ary.shape, None, cl.SVM(ary)) - queue.finish() # synchronize - print(ary) # access from host + prg.twice(queue, ary.shape, None, cl.SVM(ary)) + queue.finish() # synchronize + print(ary) # access from host - Observe how mapping (as needed in coarse-grain SVM) is no longer - necessary. + Observe how mapping (as needed in coarse-grain SVM) is no longer + necessary. - * any :class:`numpy.ndarray` (or other Python object with a buffer - interface) if the implementation supports fine-grained *system* shared - virtual memory. + * any :class:`numpy.ndarray` (or other Python object with a buffer + interface) if the implementation supports fine-grained *system* + shared virtual memory. - This is how plain :mod:`numpy` arrays may directly be passed to a - kernel:: + This is how plain :mod:`numpy` arrays may directly be passed to a + kernel:: - ary = np.zeros(1000, np.float32) - prg.twice(queue, ary.shape, None, cl.SVM(ary)) - queue.finish() # synchronize - print(ary) # access from host + ary = np.zeros(1000, np.float32) + prg.twice(queue, ary.shape, None, cl.SVM(ary)) + queue.finish() # synchronize + print(ary) # access from host - Objects of this type may be passed to kernel calls and :func:`enqueue_copy`. - Coarse-grain shared-memory *must* be mapped into host address space using - :meth:`map` before being accessed through the :mod:`numpy` interface. + Objects of this type may be passed to kernel calls and + :func:`enqueue_copy`. Coarse-grain shared-memory *must* be mapped + into host address space using :meth:`map` before being accessed + through the :mod:`numpy` interface. - .. note:: + .. note:: - This object merely serves as a 'tag' that changes the behavior - of functions to which it is passed. It has no special management - relationship to the memory it tags. For example, it is permissible - to grab a :mod:`numpy.array` out of :attr:`SVM.mem` of one - :class:`SVM` instance and use the array to construct another. - Neither of the tags need to be kept alive. + This object merely serves as a 'tag' that changes the behavior + of functions to which it is passed. It has no special management + relationship to the memory it tags. For example, it is permissible + to grab a :mod:`numpy.array` out of :attr:`SVM.mem` of one + :class:`SVM` instance and use the array to construct another. + Neither of the tags need to be kept alive. - .. versionadded:: 2016.2 + .. versionadded:: 2016.2 - .. attribute:: mem + .. attribute:: mem - The wrapped object. + The wrapped object. - .. automethod:: __init__ - .. automethod:: map - .. automethod:: map_ro - .. automethod:: map_rw - .. automethod:: as_buffer - """ + .. automethod:: __init__ + .. automethod:: map + .. automethod:: map_ro + .. automethod:: map_rw + .. automethod:: as_buffer + """ - svm_old_init = SVM.__init__ + if get_cl_header_version() >= (2, 0): + svm_old_init = SVM.__init__ def svm_init(self, mem): svm_old_init(self, mem) @@ -1217,12 +1230,13 @@ def _add_functionality(): return Buffer(ctx, flags, size=self.mem.nbytes, hostbuf=self.mem) - SVM.__init__ = svm_init - SVM.map = svm_map - SVM.map_ro = svm_map_ro - SVM.map_rw = svm_map_rw - SVM._enqueue_unmap = svm__enqueue_unmap - SVM.as_buffer = svm_as_buffer + if get_cl_header_version() >= (2, 0): + SVM.__init__ = svm_init + SVM.map = svm_map + SVM.map_ro = svm_map_ro + SVM.map_rw = svm_map_rw + SVM._enqueue_unmap = svm__enqueue_unmap + SVM.as_buffer = svm_as_buffer # }}} @@ -1670,7 +1684,7 @@ def enqueue_copy(queue, dest, src, **kwargs): else: raise ValueError("invalid dest mem object type") - elif isinstance(dest, SVM): + elif get_cl_header_version() >= (2, 0) and isinstance(dest, SVM): # to SVM if isinstance(src, SVM): src = src.mem @@ -2020,8 +2034,9 @@ _KERNEL_ARG_CLASSES = ( MemoryObjectHolder, Sampler, LocalMemory, - SVM, ) +if get_cl_header_version() >= (2, 0): + _KERNEL_ARG_CLASSES = _KERNEL_ARG_CLASSES + (SVM,) # vim: foldmethod=marker -- GitLab From c54e7c7ba636ccecdcce0e7a6106b72a253a8425 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 11 Aug 2018 21:59:36 -0500 Subject: [PATCH 48/92] [pybind11] Fix version ifdef for Kernel._set_arg_svm --- src/wrap_cl_part_2.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index acf600c7..ac1533e2 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -388,7 +388,7 @@ void pyopencl_expose_part_2(py::module &m) .DEF_SIMPLE_METHOD(get_work_group_info) .def("_set_arg_null", &cls::set_arg_null) .def("_set_arg_buf", &cls::set_arg_buf) -#if PYOPENCL_CL_VERSION >= 0x1020 +#if PYOPENCL_CL_VERSION >= 0x2000 .def("_set_arg_svm", &cls::set_arg_svm) #endif .DEF_SIMPLE_METHOD(set_arg) -- GitLab From 543aa636f3ad03ada16073a34d2d5f11dd906de5 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 11 Aug 2018 22:03:06 -0500 Subject: [PATCH 49/92] AMD GPU CI: Use extra special OCL_ICD_VENDORS --- .gitlab-ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1687d168..82b0f349 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -73,6 +73,10 @@ Python 3.6 AMD GPU: - export PY_EXE=python3.6 - export PYOPENCL_TEST=amd:fiji - export EXTRA_INSTALL="pybind11 numpy mako" + + # https://andreask.cs.illinois.edu/MachineShop/UserNotes + - export OCL_ICD_VENDORS=/etc/OpenCLwithAMD/vendors + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" allow_failure: true -- GitLab From 30af0611695e3301a5ee881f37492d2d2b3f80e5 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 11 Aug 2018 22:04:15 -0500 Subject: [PATCH 50/92] Add pybind11 as a dep for the Conda CI --- .test-conda-env-py3.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index a149bbe4..b7824b0b 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -9,3 +9,4 @@ dependencies: - pocl - osx-pocl-opencl - mako +- pybind11 -- GitLab From 68757a4535dc2225e94fd96a033378c4b244bde8 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 11 Aug 2018 23:58:25 -0500 Subject: [PATCH 51/92] [pybind11] Implement Event.set_callback --- src/wrap_cl.hpp | 103 +++++++++++++++++++++++++++++++++++++++++ src/wrap_cl_part_1.cpp | 3 ++ test/test_wrapper.py | 2 +- 3 files changed, 107 insertions(+), 1 deletion(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 36881b9f..6c004171 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -46,6 +46,10 @@ #endif +#include +#include +#include + #include #include #include @@ -1345,6 +1349,105 @@ namespace pyopencl { PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, (1, &m_event)); } + +#if PYOPENCL_CL_VERSION >= 0x1010 + // {{{ set_callback, by way of a a thread-based construction + + private: + struct event_callback_info_t + { + std::mutex m_mutex; + std::condition_variable m_condvar; + + py::object m_py_event; + py::object m_py_callback; + + bool m_set_callback_suceeded; + + cl_event m_event; + cl_int m_command_exec_status; + + event_callback_info_t(py::object py_event, py::object py_callback) + : m_set_callback_suceeded(true), m_py_event(py_event), m_py_callback(py_callback) + {} + }; + + static void evt_callback(cl_event evt, cl_int command_exec_status, void *user_data) + { + event_callback_info_t *cb_info = reinterpret_cast(user_data); + { + std::lock_guard lg(cb_info->m_mutex); + cb_info->m_event = evt; + cb_info->m_command_exec_status = command_exec_status; + } + cb_info->m_condvar.notify_one(); + } + + public: + void set_callback(cl_int command_exec_callback_type, py::object pfn_event_notify) + { + // The reason for doing this via a thread is that we're able to wait on + // acquiring the GIL. (which we can't in the callback) + + std::unique_ptr cb_info_holder( + new event_callback_info_t( + handle_from_new_ptr(new event(*this)), + pfn_event_notify)); + event_callback_info_t *cb_info = cb_info_holder.get(); + + std::thread notif_thread([cb_info]() + { + std::unique_lock ulk(cb_info->m_mutex); + cb_info->m_condvar.wait(ulk); + + { + py::gil_scoped_acquire acquire; + + if (cb_info->m_set_callback_suceeded) + { + try { + cb_info->m_py_callback( + // cb_info->m_py_event, + cb_info->m_command_exec_status); + } + catch (std::exception &exc) + { + std::cerr + << "[pyopencl] event callback handler threw an exception, ignoring: " + << exc.what() + << std::endl; + } + } + + // Need to hold GIL to delete py::object instances in + // event_callback_info_t + delete cb_info; + } + }); + // Thread is away--it is now its responsibility to free cb_info. + cb_info_holder.release(); + + // notif_thread should no longer be coupled to the lifetime of the thread. + notif_thread.detach(); + + try + { + PYOPENCL_CALL_GUARDED(clSetEventCallback, ( + data(), command_exec_callback_type, &event::evt_callback, cb_info)); + } + catch (...) { + // Setting the callback did not succeed. The thread would never + // be woken up. Wake it up to let it know that it can stop. + { + std::lock_guard lg(cb_info->m_mutex); + cb_info->m_set_callback_suceeded = false; + } + cb_info->m_condvar.notify_one(); + throw; + } + } + // }}} +#endif }; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index 785427ee..a88dc756 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -111,6 +111,9 @@ void pyopencl_expose_part_1(py::module &m) .def(py::self != py::self) .def("__hash__", &cls::hash) PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_event) +#if PYOPENCL_CL_VERSION >= 0x1010 + .DEF_SIMPLE_METHOD(set_callback) +#endif ; } { diff --git a/test/test_wrapper.py b/test/test_wrapper.py index bba4ca6a..45d04b54 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -793,7 +793,7 @@ def test_event_set_callback(ctx_factory): queue = cl.CommandQueue(ctx) if ctx._get_cl_version() < (1, 1): - pytest.skip("OpenCL 1.1 or newer required fro set_callback") + pytest.skip("OpenCL 1.1 or newer required for set_callback") a_np = np.random.rand(50000).astype(np.float32) b_np = np.random.rand(50000).astype(np.float32) -- GitLab From 1faef5decc1a8d878a594c7fe04bede7497a1d7c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 12 Aug 2018 00:05:27 -0500 Subject: [PATCH 52/92] [pybind11] Fix a few issues related to the old buffer protocol --- src/wrap_cl.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 6c004171..75a571f3 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -1368,7 +1368,7 @@ namespace pyopencl cl_int m_command_exec_status; event_callback_info_t(py::object py_event, py::object py_callback) - : m_set_callback_suceeded(true), m_py_event(py_event), m_py_callback(py_callback) + : m_py_event(py_event), m_py_callback(py_callback), m_set_callback_suceeded(true) {} }; @@ -3166,8 +3166,8 @@ namespace pyopencl m_ptr = ward->m_buf.buf; m_size = ward->m_buf.len; #else - py::object ward = buffer; - if (PyObject_AsWriteBuffer(buffer.ptr(), &m_ptr, &m_size)) + py::object ward = holder; + if (PyObject_AsWriteBuffer(holder.ptr(), &m_ptr, &m_size)) throw py::error_already_set(); #endif } @@ -3310,7 +3310,7 @@ namespace pyopencl pattern_len = pattern_ward->m_buf.len; #else py::object pattern_ward = py_pattern; - if (PyObject_AsReadBuffer(buffer.ptr(), &pattern_buf, &pattern_len)) + if (PyObject_AsReadBuffer(py_pattern.ptr(), &pattern_ptr, &pattern_len)) throw py::error_already_set(); #endif -- GitLab From ef9a517b462337c508812fe0a69144cf08e195aa Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 12 Aug 2018 00:11:07 -0500 Subject: [PATCH 53/92] [pybind11] Drop the Py2.6 CI --- .gitlab-ci.yml | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 82b0f349..57ed7d15 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -86,20 +86,6 @@ Python 3.6 AMD GPU: except: - tags -Python 2.6 POCL CPU: - script: - - export PY_EXE=python2.6 - - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="pybind11 numpy mako" - - export NO_DOCTESTS=1 - - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - - ". ./build-and-test-py-project.sh" - tags: - - python2.6 - - amd-cl-cpu - except: - - tags - Python 2.7 POCL: script: - export PY_EXE=python2.7 -- GitLab From 90b237cda04c0cd5b885306c4b661aff3e334ec5 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 12 Aug 2018 00:12:16 -0500 Subject: [PATCH 54/92] [pybind11] Stop claiming Python 2.6 support --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 092ac928..d06f1efe 100644 --- a/setup.py +++ b/setup.py @@ -283,7 +283,6 @@ def main(): 'Programming Language :: C++', 'Programming Language :: Python', 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.2', -- GitLab From f14bdfe14c3b92a1d2b6988a4d06a0a1579a3125 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 12 Aug 2018 00:18:29 -0500 Subject: [PATCH 55/92] [pybind11] Add missing const in svm_memfill --- src/wrap_cl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 75a571f3..a61c6889 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -3298,7 +3298,7 @@ namespace pyopencl { PYOPENCL_PARSE_WAIT_FOR; - void *pattern_ptr; + const void *pattern_ptr; PYOPENCL_BUFFER_SIZE_T pattern_len; #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE -- GitLab From 28ae6ab77df2f455d28eda26262f3a84b8fa06ab Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 12 Aug 2018 00:27:31 -0500 Subject: [PATCH 56/92] [pybind11] Also use custom ext header on Apple --- src/wrap_cl.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index a61c6889..76a4af52 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -12,6 +12,7 @@ #ifdef __APPLE__ // Mac ------------------------------------------------------------------------ +#include "pyopencl_ext.h" #include #ifdef HAVE_GL -- GitLab From 1233da03b2e1d824ce63b404c078617f4c32fc71 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 12 Aug 2018 00:40:42 -0500 Subject: [PATCH 57/92] Wait longer for the event callback to arrive --- test/test_wrapper.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/test_wrapper.py b/test/test_wrapper.py index 45d04b54..69e2dc2d 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -827,9 +827,17 @@ def test_event_set_callback(ctx_factory): queue.finish() + counter = 0 + # yuck - from time import sleep - sleep(0.1) + while not got_called: + from time import sleep + sleep(0.01) + + # wait up to a second + counter += 1 + if counter >= 100: + break assert got_called -- GitLab From f1d95c25f19c350a75aef1bf36d0bb729992e8a8 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 12 Aug 2018 00:43:23 -0500 Subject: [PATCH 58/92] Get rid of support for cl_ext_migrate_memobject --- doc/make_constants.py | 4 ---- doc/runtime_memory.rst | 9 --------- pyopencl/__init__.py | 2 -- src/wrap_cl.hpp | 42 ------------------------------------------ src/wrap_cl_part_1.cpp | 8 -------- src/wrap_constants.cpp | 11 ----------- 6 files changed, 76 deletions(-) diff --git a/doc/make_constants.py b/doc/make_constants.py index c9de4cd8..9ab78ad0 100644 --- a/doc/make_constants.py +++ b/doc/make_constants.py @@ -335,7 +335,6 @@ const_ext_lookup = { "WRITE_BUFFER_RECT": cl_11, "COPY_BUFFER_RECT": cl_11, "USER": cl_11, - "MIGRATE_MEM_OBJECT_EXT": ("cl_ext_migrate_memobject", "2011.2"), "BARRIER": cl_12, "MIGRATE_MEM_OBJECTS": cl_12, "FILL_BUFFER": cl_12, @@ -408,9 +407,6 @@ const_ext_lookup = { "CONTENT_UNDEFINED": cl_12, }, - cl.migrate_mem_object_flags_ext: { - "HOST": ("cl_ext_migrate_memobject", "2011.2"), - }, } try: gl_ci = cl.gl_context_info diff --git a/doc/runtime_memory.rst b/doc/runtime_memory.rst index 75b60253..8431e44a 100644 --- a/doc/runtime_memory.rst +++ b/doc/runtime_memory.rst @@ -43,15 +43,6 @@ Memory Migration Only available with CL 1.2. -.. function:: enqueue_migrate_mem_object_ext(queue, mem_objects, flags=0, wait_for=None) - - :param flags: from :class:`migrate_mem_object_flags_ext` - - .. versionadded:: 2011.2 - - Only available with the `cl_ext_migrate_memobject` - extension. - Buffer ------ diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 874cadac..c99434bd 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -129,8 +129,6 @@ from pyopencl._cl import ( # noqa _enqueue_marker, - enqueue_migrate_mem_object_ext, - _enqueue_read_buffer, _enqueue_write_buffer, _enqueue_copy_buffer, diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 76a4af52..555c4a01 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -12,7 +12,6 @@ #ifdef __APPLE__ // Mac ------------------------------------------------------------------------ -#include "pyopencl_ext.h" #include #ifdef HAVE_GL @@ -1766,47 +1765,6 @@ namespace pyopencl } #endif -#ifdef cl_ext_migrate_memobject - inline - event *enqueue_migrate_mem_object_ext( - command_queue &cq, - py::object py_mem_objects, - cl_mem_migration_flags_ext flags, - py::object py_wait_for) - { - PYOPENCL_PARSE_WAIT_FOR; - -#if PYOPENCL_CL_VERSION >= 0x1020 - // {{{ get platform - cl_device_id dev; - PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, (cq.data(), CL_QUEUE_DEVICE, - sizeof(dev), &dev, nullptr)); - cl_platform_id plat; - PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_PLATFORM, - sizeof(plat), &plat, nullptr)); - // }}} -#endif - - PYOPENCL_GET_EXT_FUN(plat, - clEnqueueMigrateMemObjectEXT, enqueue_migrate_fn); - - std::vector mem_objects; - for (py::handle mo: py_mem_objects) - mem_objects.push_back(mo.cast().data()); - - cl_event evt; - PYOPENCL_RETRY_IF_MEM_ERROR( - PYOPENCL_CALL_GUARDED(enqueue_migrate_fn, ( - cq.data(), - mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(), - flags, - PYOPENCL_WAITLIST_ARGS, &evt - )); - ); - PYOPENCL_RETURN_NEW_EVENT(evt); - } -#endif - // }}} diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index a88dc756..2927c8c1 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -205,14 +205,6 @@ void pyopencl_expose_part_1(py::module &m) ); #endif -#ifdef cl_ext_migrate_memobject - m.def("enqueue_migrate_mem_object_ext", enqueue_migrate_mem_object_ext, - py::arg("queue"), - py::arg("mem_objects"), - py::arg("flags")=0, - py::arg("wait_for")=py::none() - ); -#endif // }}} // {{{ buffer diff --git a/src/wrap_constants.cpp b/src/wrap_constants.cpp index 0627fe85..cf004bcf 100644 --- a/src/wrap_constants.cpp +++ b/src/wrap_constants.cpp @@ -56,7 +56,6 @@ namespace class gl_object_type { }; class gl_texture_info { }; - class migrate_mem_object_flags_ext {}; // }}} } @@ -879,9 +878,6 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(COMMAND_, COPY_BUFFER_RECT); ADD_ATTR(COMMAND_, USER); #endif -#ifdef cl_ext_migrate_memobject - ADD_ATTR(COMMAND_, MIGRATE_MEM_OBJECT_EXT); -#endif #if PYOPENCL_CL_VERSION >= 0x1020 ADD_ATTR(COMMAND_, BARRIER); ADD_ATTR(COMMAND_, MIGRATE_MEM_OBJECTS); @@ -973,13 +969,6 @@ void pyopencl_expose_constants(py::module &m) } #endif - { - py::class_ cls(m, "migrate_mem_object_flags_ext"); -#ifdef cl_ext_migrate_memobject - ADD_ATTR_SUFFIX(MIGRATE_MEM_OBJECT_, HOST, _EXT); -#endif - } - // }}} } -- GitLab From 4835f6d2d7ee0ac97778c26ea5fef67ce141227e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 12 Aug 2018 00:45:53 -0500 Subject: [PATCH 59/92] Temporarily allow pypy to fail --- .gitlab-ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 57ed7d15..6a34b059 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -172,6 +172,10 @@ PyPy POCL: - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" + + # FIXME: For now, on the pybind11 branch + allow_failure: true + tags: - pypy - pocl -- GitLab From b3c7188523b183dafda62d9e6e0f81b179fba17d Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 12 Aug 2018 00:54:23 -0500 Subject: [PATCH 60/92] [pybind11] Also use custom ext header on Apple--again --- src/wrap_cl.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 555c4a01..fa7880bd 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -13,6 +13,7 @@ // Mac ------------------------------------------------------------------------ #include +#include "pyopencl_ext.h" #ifdef HAVE_GL #define PYOPENCL_GL_SHARING_VERSION 1 -- GitLab From 51d698452bad44ad358a1341bc8eb1583f0ab505 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 12 Aug 2018 01:11:33 -0500 Subject: [PATCH 61/92] CI: Move GL and special functions off of mostly non-working AMD CPU build --- .gitlab-ci.yml | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6a34b059..e5a5b44a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,9 +1,8 @@ -"Python 2.7 AMD CPU (+GL and special func)": +"Python 2.7 AMD CPU": script: - export PY_EXE=python2.7 - export PYOPENCL_TEST=amd:pu - - export EXTRA_INSTALL="pybind11 numpy mako scipy pyfmmlib" - - echo "CL_ENABLE_GL = True" > siteconf.py + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" allow_failure: true @@ -139,6 +138,20 @@ Python 3.6 POCL: except: - tags +Python 3.6 POCL (+GL and special functions): + script: + - export PY_EXE=python3.6 + - export PYOPENCL_TEST=portable + - export EXTRA_INSTALL="pybind11 numpy mako scipy pyfmmlib" + - echo "CL_ENABLE_GL = True" > siteconf.py + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh + - ". ./build-and-test-py-project.sh" + tags: + - python3.6 + - pocl + except: + - tags + Python 2.7 Apple: script: - export PY_EXE=python2.7 -- GitLab From 4a32980166e87a07cb7d062d7d7f5aebf97ca956 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 12 Aug 2018 01:38:35 -0500 Subject: [PATCH 62/92] Fix/disable some compiler warnings --- src/wrap_cl.hpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index fa7880bd..b9753134 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -1201,8 +1201,15 @@ namespace pyopencl cl_int status_code; PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueue"); +#if defined(__GNUG__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif m_queue = clCreateCommandQueue( ctx.data(), dev, props, &status_code); +#if defined(__GNUG__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif if (status_code != CL_SUCCESS) throw pyopencl::error("CommandQueue", status_code); @@ -3400,10 +3407,18 @@ namespace pyopencl { cl_int status_code; PYOPENCL_PRINT_CALL_TRACE("clCreateSampler"); + +#if defined(__GNUG__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif m_sampler = clCreateSampler( ctx.data(), normalized_coordinates, am, fm, &status_code); +#if defined(__GNUG__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif if (status_code != CL_SUCCESS) throw pyopencl::error("Sampler", status_code); @@ -3706,7 +3721,6 @@ namespace pyopencl std::vector devices; std::vector binaries; std::vector sizes; - std::vector binary_statuses; size_t num_devices = len(py_devices); if (len(py_binaries) != num_devices) @@ -3737,7 +3751,7 @@ namespace pyopencl sizes.push_back(len); } - binary_statuses.resize(num_devices); + cl_int binary_statuses[num_devices]; cl_int status_code; PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBinary"); @@ -3746,7 +3760,7 @@ namespace pyopencl devices.empty( ) ? nullptr : &devices.front(), sizes.empty( ) ? nullptr : &sizes.front(), binaries.empty( ) ? nullptr : &binaries.front(), - binary_statuses.empty( ) ? nullptr : &binary_statuses.front(), + binary_statuses, &status_code); if (status_code != CL_SUCCESS) throw pyopencl::error("clCreateProgramWithBinary", status_code); -- GitLab From 0aff3e1b205a717735fa4fac8201dba85e947501 Mon Sep 17 00:00:00 2001 From: Igor Gnatenko Date: Sun, 12 Aug 2018 15:17:23 +0200 Subject: [PATCH 63/92] setup.py: drop pytest req It is not needed in runtime at all. --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 1c9ca77d..55782409 100644 --- a/setup.py +++ b/setup.py @@ -238,7 +238,6 @@ def main(): install_requires=[ "numpy", "pytools>=2017.6", - "pytest>=2", "decorator>=3.2.0", "cffi>=1.1.0", "appdirs>=1.4.0", -- GitLab From 88367d6477ac8cbc8025ce829a86026988c649fa Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 13 Aug 2018 15:02:06 -0500 Subject: [PATCH 64/92] Add pbyind11 to install_requires --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 9bc6f33c..fcf668bf 100644 --- a/setup.py +++ b/setup.py @@ -319,6 +319,7 @@ def main(): ], setup_requires=[ + "pybind11", "numpy", ], -- GitLab From 1f6e93b2553f018f7cdf5545d9a2bf455480129d Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 13 Aug 2018 15:02:48 -0500 Subject: [PATCH 65/92] Add CL2-style interface to clCreateCommandQueueProperties --- doc/runtime_queue.rst | 15 ++++- src/wrap_cl.hpp | 138 ++++++++++++++++++++++++++++++++++++++--- src/wrap_cl_part_1.cpp | 7 +-- 3 files changed, 146 insertions(+), 14 deletions(-) diff --git a/doc/runtime_queue.rst b/doc/runtime_queue.rst index b4567953..178a0c50 100644 --- a/doc/runtime_queue.rst +++ b/doc/runtime_queue.rst @@ -13,15 +13,26 @@ Command Queue Create a new command queue. *properties* is a bit field consisting of :class:`command_queue_properties` values. - if *device* is None, one of the devices in *context* is chosen + If *device* is None, one of the devices in *context* is chosen in an implementation-defined manner. + *properties* may be a bitwise combination of values from + :class:`queue_properties` (or *None* which is equivalent to + passing *0*). This is compatible with both OpenCL 1.x and 2.x. + + For OpenCL 2.0 and above, *properties* may also be a sequence + of keys and values from :class:`queue_properties` as accepted + by :c:func:`clCreateCommandQueueWithProperties` (see the OpenCL + spec for details). The trailing *0* is added automatically + and does not need to be included. + A :class:`CommandQueue` may be used as a context manager, like this:: with cl.CommandQueue(self.cl_context) as queue: enqueue_stuff(queue, ...) - :meth:`finish` is automatically called at the end of the context. + :meth:`finish` is automatically called at the end of the ``with``-delimited + context. .. versionadded:: 2013.1 diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index b9753134..bcac5a38 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -51,6 +51,7 @@ #include #include +#include #include #include #include @@ -1028,6 +1029,47 @@ namespace pyopencl throw error("Context.get_info", CL_INVALID_VALUE); } } + + + // not exposed to python + int get_hex_platform_version() const + { + std::vector devices; + PYOPENCL_GET_VEC_INFO(Context, m_context, CL_CONTEXT_DEVICES, devices); + + if (devices.size() == 0) + throw error("Context._get_hex_version", CL_INVALID_VALUE, + "platform has no devices"); + + cl_platform_id plat; + + PYOPENCL_CALL_GUARDED(clGetDeviceInfo, + (devices[0], CL_DEVICE_PLATFORM, sizeof(plat), &plat, nullptr)); + + std::string plat_version; + { + size_t param_value_size; + PYOPENCL_CALL_GUARDED(clGetPlatformInfo, + (plat, CL_PLATFORM_VERSION, 0, 0, ¶m_value_size)); + + std::vector param_value(param_value_size); + PYOPENCL_CALL_GUARDED(clGetPlatformInfo, + (plat, CL_PLATFORM_VERSION, param_value_size, + param_value.empty( ) ? nullptr : ¶m_value.front(), ¶m_value_size)); + + plat_version = + param_value.empty( ) ? "" : std::string(¶m_value.front(), param_value_size-1); + } + + int major_ver, minor_ver; + errno = 0; + int match_count = sscanf(plat_version.c_str(), "OpenCL %d.%d ", &major_ver, &minor_ver); + if (errno || match_count != 2) + throw error("Context._get_hex_version", CL_INVALID_VALUE, + "Platform version string did not have expected format"); + + return major_ver << 12 | minor_ver << 4; + } }; @@ -1183,8 +1225,8 @@ namespace pyopencl command_queue( const context &ctx, - const device *py_dev=0, - cl_command_queue_properties props=0) + const device *py_dev=nullptr, + py::object py_props=py::none()) { cl_device_id dev; if (py_dev) @@ -1199,20 +1241,100 @@ namespace pyopencl dev = devs[0]; } - cl_int status_code; - PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueue"); + int hex_plat_version = ctx.get_hex_platform_version(); + printf("plat version code: %d\n", hex_plat_version); + + bool props_given_as_numeric; + cl_command_queue_properties num_props; + if (py_props.is_none()) + { + num_props = 0; + props_given_as_numeric = true; + } + else + { + try + { + num_props = py::cast(py_props); + props_given_as_numeric = true; + } + catch (py::cast_error &) + { + props_given_as_numeric = false; + } + } + + if (props_given_as_numeric) + { +#if PYOPENCL_CL_VERSION >= 0x2000 + if (hex_plat_version >= 0x2000) + { + cl_queue_properties props_list[] = { CL_QUEUE_PROPERTIES, num_props, 0 }; + + cl_int status_code; + + PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueueWithProperties"); + m_queue = clCreateCommandQueueWithProperties( + ctx.data(), dev, props_list, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("CommandQueue", status_code); + } + else +#endif + { + cl_int status_code; + + PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueue"); #if defined(__GNUG__) && !defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #endif - m_queue = clCreateCommandQueue( - ctx.data(), dev, props, &status_code); + m_queue = clCreateCommandQueue( + ctx.data(), dev, num_props, &status_code); #if defined(__GNUG__) && !defined(__clang__) #pragma GCC diagnostic pop +#endif + if (status_code != CL_SUCCESS) + throw pyopencl::error("CommandQueue", status_code); + } + } + else + { +#if PYOPENCL_CL_VERSION >= 0x2000 + throw error("CommandQueue", CL_INVALID_VALUE, + "queue properties given as an iterable, " + "which is only allowed when PyOpenCL was built " + "against an OpenCL 2+ header"); #endif - if (status_code != CL_SUCCESS) - throw pyopencl::error("CommandQueue", status_code); + if (hex_plat_version < 0x2000) + { + std::cerr << + "queue properties given as an iterable, " + "which uses an OpenCL 2+-only interface, " + "but the context's platform does not " + "declare OpenCL 2 support. Proceeding " + "as asked, but the next think you see " + "may be a crash." << std:: endl; + } + + cl_queue_properties props[py::len(py_props) + 1]; + { + size_t i = 0; + for (auto prop: py_props) + props[i++] = py::cast(prop); + props[i++] = 0; + } + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueueWithProperties"); + m_queue = clCreateCommandQueueWithProperties( + ctx.data(), dev, props, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("CommandQueue", status_code); + } } ~command_queue() diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index 2927c8c1..6309f98a 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -80,11 +80,10 @@ void pyopencl_expose_part_1(py::module &m) typedef command_queue cls; py::class_>(m, "CommandQueue", py::dynamic_attr()) .def( - py::init(), + py::init(), py::arg("context"), - py::arg("device")=py::none(), - py::arg("properties")=0) + py::arg("device").none(true)=py::none(), + py::arg("properties")=py::cast(0)) .DEF_SIMPLE_METHOD(get_info) #if PYOPENCL_CL_VERSION < 0x1010 .DEF_SIMPLE_METHOD(set_property) -- GitLab From d413aa40fb6573dcfe0918fb122ea5df17b4221e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 13 Aug 2018 15:21:55 -0500 Subject: [PATCH 66/92] Fix docs of queue-with-properties constructor --- doc/runtime_queue.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/runtime_queue.rst b/doc/runtime_queue.rst index 178a0c50..c0b42897 100644 --- a/doc/runtime_queue.rst +++ b/doc/runtime_queue.rst @@ -38,6 +38,10 @@ Command Queue Context manager capability. + .. versionchanged:: 2018.2 + + Added the sequence-of-properties interface for OpenCL 2. + .. attribute:: info Lower case versions of the :class:`command_queue_info` constants -- GitLab From 7acb005704aa075de706e9cb05f7bdf510e8d6d4 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 13 Aug 2018 15:22:46 -0500 Subject: [PATCH 67/92] Various fixes for queue-with-properties constructor --- src/wrap_cl.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index bcac5a38..a444b798 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -1242,7 +1242,6 @@ namespace pyopencl } int hex_plat_version = ctx.get_hex_platform_version(); - printf("plat version code: %d\n", hex_plat_version); bool props_given_as_numeric; cl_command_queue_properties num_props; @@ -1306,7 +1305,6 @@ namespace pyopencl "queue properties given as an iterable, " "which is only allowed when PyOpenCL was built " "against an OpenCL 2+ header"); -#endif if (hex_plat_version < 0x2000) { @@ -1315,7 +1313,7 @@ namespace pyopencl "which uses an OpenCL 2+-only interface, " "but the context's platform does not " "declare OpenCL 2 support. Proceeding " - "as asked, but the next think you see " + "as requested, but the next thing you see " "may be a crash." << std:: endl; } @@ -1335,6 +1333,7 @@ namespace pyopencl if (status_code != CL_SUCCESS) throw pyopencl::error("CommandQueue", status_code); } +#endif } ~command_queue() -- GitLab From ff6e119572158beaf852c4eb8f8cbfcfaab99cc1 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 13 Aug 2018 15:28:01 -0500 Subject: [PATCH 68/92] Implement sampler creation with properties --- doc/runtime_memory.rst | 27 +++++++++++---- src/wrap_cl.hpp | 76 ++++++++++++++++++++++++++++++++++++++---- src/wrap_cl_part_2.cpp | 3 ++ 3 files changed, 93 insertions(+), 13 deletions(-) diff --git a/doc/runtime_memory.rst b/doc/runtime_memory.rst index 8431e44a..a4ad2d5f 100644 --- a/doc/runtime_memory.rst +++ b/doc/runtime_memory.rst @@ -352,13 +352,28 @@ Mapping Memory into Host Address Space Samplers -------- -.. class:: Sampler(context, normalized_coords, addressing_mode, filter_mode) +.. class:: Sampler - *normalized_coords* is a :class:`bool` indicating whether - to use coordinates between 0 and 1 (*True*) or the texture's - natural pixel size (*False*). - See :class:`addressing_mode` and :class:`filter_mode` for possible - argument values. + + .. method:: __init__(context, normalized_coords, addressing_mode, filter_mode) + + *normalized_coords* is a :class:`bool` indicating whether + to use coordinates between 0 and 1 (*True*) or the texture's + natural pixel size (*False*). + See :class:`addressing_mode` and :class:`filter_mode` for possible + argument values. + + .. method:: __init__(context, properties) + + :arg properties: a sequence + of keys and values from :class:`sampler_properties` as accepted + by :c:func:`clCreateSamplerWithProperties` (see the OpenCL + spec for details). The trailing *0* is added automatically + and does not need to be included. + + Requires OpenCL 2 or newer. + + .. versionadded:: 2018.2 .. attribute:: info diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index a444b798..a55c1806 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -3523,26 +3523,88 @@ namespace pyopencl cl_sampler m_sampler; public: +#if PYOPENCL_CL_VERSION >= 0x2000 + sampler(context const &ctx, py::sequence py_props) + { + int hex_plat_version = ctx.get_hex_platform_version(); + + if (hex_plat_version < 0x2000) + { + std::cerr << + "sampler properties given as an iterable, " + "which uses an OpenCL 2+-only interface, " + "but the context's platform does not " + "declare OpenCL 2 support. Proceeding " + "as requested, but the next thing you see " + "may be a crash." << std:: endl; + } + + cl_sampler_properties props[py::len(py_props) + 1]; + { + size_t i = 0; + for (auto prop: py_props) + props[i++] = py::cast(prop); + props[i++] = 0; + } + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateSamplerWithProperties"); + + m_sampler = clCreateSamplerWithProperties( + ctx.data(), + props, + &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("Sampler", status_code); + } +#endif + sampler(context const &ctx, bool normalized_coordinates, cl_addressing_mode am, cl_filter_mode fm) { - cl_int status_code; PYOPENCL_PRINT_CALL_TRACE("clCreateSampler"); + int hex_plat_version = ctx.get_hex_platform_version(); +#if PYOPENCL_CL_VERSION >= 0x2000 + if (hex_plat_version >= 0x2000) + { + cl_sampler_properties props_list[] = { + CL_SAMPLER_NORMALIZED_COORDS, normalized_coordinates, + CL_SAMPLER_ADDRESSING_MODE, am, + CL_SAMPLER_FILTER_MODE, fm, + 0, + }; + + cl_int status_code; + + PYOPENCL_PRINT_CALL_TRACE("clCreateSamplerWithProperties"); + m_sampler = clCreateSamplerWithProperties( + ctx.data(), props_list, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("Sampler", status_code); + } + else +#endif + { + cl_int status_code; + #if defined(__GNUG__) && !defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #endif - m_sampler = clCreateSampler( - ctx.data(), - normalized_coordinates, - am, fm, &status_code); + m_sampler = clCreateSampler( + ctx.data(), + normalized_coordinates, + am, fm, &status_code); #if defined(__GNUG__) && !defined(__clang__) #pragma GCC diagnostic pop #endif - if (status_code != CL_SUCCESS) - throw pyopencl::error("Sampler", status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("Sampler", status_code); + } } sampler(cl_sampler samp, bool retain) diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index ac1533e2..2e8e9e4c 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -299,6 +299,9 @@ void pyopencl_expose_part_2(py::module &m) { typedef sampler cls; py::class_(m, "Sampler", py::dynamic_attr()) +#if PYOPENCL_CL_VERSION >= 0x2000 + .def(py::init()) +#endif .def(py::init()) .DEF_SIMPLE_METHOD(get_info) .def(py::self == py::self) -- GitLab From 8cf0d7764f0508f08110b060abc04815de17ae43 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 13 Aug 2018 16:13:13 -0500 Subject: [PATCH 69/92] Fix ifdef/block nesting in command queue constructor --- src/wrap_cl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index a55c1806..8d8a8d18 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -1332,8 +1332,8 @@ namespace pyopencl if (status_code != CL_SUCCESS) throw pyopencl::error("CommandQueue", status_code); - } #endif + } } ~command_queue() -- GitLab From 88eecd2f65c4f72e995956986a190d80aa0827bd Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 14 Aug 2018 12:01:57 -0500 Subject: [PATCH 70/92] Add back retain flag argument to from_int_ptr --- pyopencl/__init__.py | 4 ++-- src/wrap_cl.hpp | 16 ++++++++-------- src/wrap_cl_part_1.cpp | 16 +++++++++++----- src/wrap_helpers.hpp | 13 +++++++++---- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index c99434bd..4a81e306 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -356,8 +356,8 @@ class Program(object): return self._get_prg().int_ptr int_ptr = property(int_ptr, doc=_cl._Program.int_ptr.__doc__) - def from_int_ptr(int_ptr_value): - return Program(_cl._Program.from_int_ptr(int_ptr_value)) + def from_int_ptr(int_ptr_value, retain=true): + return Program(_cl._Program.from_int_ptr(int_ptr_value), retain) from_int_ptr.__doc__ = _cl._Program.from_int_ptr.__doc__ from_int_ptr = staticmethod(from_int_ptr) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 8d8a8d18..ba6a7544 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -1775,7 +1775,7 @@ namespace pyopencl // {{{ memory_object - py::object create_mem_object_wrapper(cl_mem mem); + py::object create_mem_object_wrapper(cl_mem mem, bool retain); class memory_object_holder { @@ -2480,7 +2480,7 @@ namespace pyopencl return py::none(); } - return create_mem_object_wrapper(param_value); + return create_mem_object_wrapper(param_value, /* retain */ true); } case CL_IMAGE_NUM_MIP_LEVELS: @@ -4688,7 +4688,7 @@ namespace pyopencl // {{{ deferred implementation bits - inline py::object create_mem_object_wrapper(cl_mem mem) + inline py::object create_mem_object_wrapper(cl_mem mem, bool retain=true) { cl_mem_object_type mem_obj_type; PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \ @@ -4698,7 +4698,7 @@ namespace pyopencl { case CL_MEM_OBJECT_BUFFER: return py::object(handle_from_new_ptr( - new buffer(mem, /*retain*/ true))); + new buffer(mem, retain))); case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE3D: #if PYOPENCL_CL_VERSION >= 0x1020 @@ -4708,17 +4708,17 @@ namespace pyopencl case CL_MEM_OBJECT_IMAGE1D_BUFFER: #endif return py::object(handle_from_new_ptr( - new image(mem, /*retain*/ true))); + new image(mem, retain))); default: return py::object(handle_from_new_ptr( - new memory_object(mem, /*retain*/ true))); + new memory_object(mem, retain))); } } inline - py::object memory_object_from_int(intptr_t cl_mem_as_int) + py::object memory_object_from_int(intptr_t cl_mem_as_int, bool retain) { - return create_mem_object_wrapper((cl_mem) cl_mem_as_int); + return create_mem_object_wrapper((cl_mem) cl_mem_as_int, retain); } diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index 6309f98a..e102a560 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -187,11 +187,17 @@ void pyopencl_expose_part_1(py::module &m) .def_property_readonly("hostbuf", &cls::hostbuf) .def_static("from_int_ptr", memory_object_from_int, - "(static method) Return a new Python object referencing the C-level " \ - ":c:type:`cl_mem` object at the location pointed to " \ - "by *int_ptr_value*. The relevant :c:func:`clRetain*` function " \ - "will be called." \ - "\n\n.. versionadded:: 2013.2\n") \ + "(static method) Return a new Python object referencing the C-level " + ":c:type:`cl_mem` object at the location pointed to " + "by *int_ptr_value*. The relevant :c:func:`clRetain*` function " + "will be called if *retain* is True." + "If the previous owner of the object will *not* release the reference, " + "*retain* should be set to *False*, to effectively transfer ownership to " + ":mod:`pyopencl`." + "\n\n.. versionadded:: 2013.2\n", + "\n\n.. versionchanged:: 2016.1\n\n *retain* added.", + py::arg("int_ptr_value"), + py::arg("retain")=true) ; } diff --git a/src/wrap_helpers.hpp b/src/wrap_helpers.hpp index d9b2389d..4a2d1ee9 100644 --- a/src/wrap_helpers.hpp +++ b/src/wrap_helpers.hpp @@ -128,10 +128,10 @@ namespace } template - inline T *from_int_ptr(intptr_t obj_ref) + inline T *from_int_ptr(intptr_t obj_ref, bool retain) { ClType clobj = (ClType) obj_ref; - return new T(clobj, /* retain */ true); + return new T(clobj, retain); } template @@ -144,11 +144,16 @@ namespace #define PYOPENCL_EXPOSE_TO_FROM_INT_PTR(CL_TYPENAME) \ .def_static("from_int_ptr", from_int_ptr, \ py::arg("int_ptr_value"), \ + py::arg("retain")=true, \ "(static method) Return a new Python object referencing the C-level " \ ":c:type:`" #CL_TYPENAME "` object at the location pointed to " \ "by *int_ptr_value*. The relevant :c:func:`clRetain*` function " \ - "will be called." \ - "\n\n.. versionadded:: 2013.2\n") \ + "will be called if *retain* is True." \ + "If the previous owner of the object will *not* release the reference, " \ + "*retain* should be set to *False*, to effectively transfer ownership to " \ + ":mod:`pyopencl`." \ + "\n\n.. versionadded:: 2013.2\n" \ + "\n\n.. versionchanged:: 2016.1\n\n *retain* added.") \ .def_property_readonly("int_ptr", to_int_ptr, \ "Return an integer corresponding to the pointer value " \ "of the underlying :c:type:`" #CL_TYPENAME "`. " \ -- GitLab From 5405d7ba8ebe6d78e186ca9031707d5bb45d7594 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 14 Aug 2018 12:25:59 -0500 Subject: [PATCH 71/92] Fix py typo true -> True --- pyopencl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 4a81e306..9d8ebea5 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -356,7 +356,7 @@ class Program(object): return self._get_prg().int_ptr int_ptr = property(int_ptr, doc=_cl._Program.int_ptr.__doc__) - def from_int_ptr(int_ptr_value, retain=true): + def from_int_ptr(int_ptr_value, retain=True): return Program(_cl._Program.from_int_ptr(int_ptr_value), retain) from_int_ptr.__doc__ = _cl._Program.from_int_ptr.__doc__ from_int_ptr = staticmethod(from_int_ptr) -- GitLab From 5ebbee62c9f073536e224288676a13bbfef8b16c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 14 Aug 2018 12:57:14 -0500 Subject: [PATCH 72/92] Fix Program.from_int_ptr --- pyopencl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 9d8ebea5..2e492585 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -357,7 +357,7 @@ class Program(object): int_ptr = property(int_ptr, doc=_cl._Program.int_ptr.__doc__) def from_int_ptr(int_ptr_value, retain=True): - return Program(_cl._Program.from_int_ptr(int_ptr_value), retain) + return Program(_cl._Program.from_int_ptr(int_ptr_value, retain)) from_int_ptr.__doc__ = _cl._Program.from_int_ptr.__doc__ from_int_ptr = staticmethod(from_int_ptr) -- GitLab From 2ee1ca11167686b9a80c812ee1a71c9030628fbb Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 14 Aug 2018 13:17:10 -0500 Subject: [PATCH 73/92] Fix premature deletion of lock in Event.set_callback --- src/wrap_cl.hpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index ba6a7544..1314a156 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -1526,8 +1526,12 @@ namespace pyopencl std::thread notif_thread([cb_info]() { - std::unique_lock ulk(cb_info->m_mutex); - cb_info->m_condvar.wait(ulk); + { + std::unique_lock ulk(cb_info->m_mutex); + cb_info->m_condvar.wait(ulk); + + // ulk no longer held here, cb_info ready for deletion + } { py::gil_scoped_acquire acquire; -- GitLab From c0188b58766d954a7cb159fa6d8831b98bc7c8bd Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 14 Aug 2018 13:44:13 -0500 Subject: [PATCH 74/92] Fix bounds to be inclusive in test_random_float_in_range --- test/test_array.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/test/test_array.py b/test/test_array.py index 5f54cfa3..3e74bcf0 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -581,7 +581,7 @@ def test_bitwise(ctx_factory): @pytest.mark.parametrize("rng_class", [RanluxGenerator, PhiloxGenerator, ThreefryGenerator]) -@pytest.mark.parametrize("ary_size", [300, 301, 302, 303, 10007]) +@pytest.mark.parametrize("ary_size", [300, 301, 302, 303, 10007, 1000000]) def test_random_float_in_range(ctx_factory, rng_class, ary_size, plot_hist=False): context = ctx_factory() queue = cl.CommandQueue(context) @@ -606,16 +606,22 @@ def test_random_float_in_range(ctx_factory, rng_class, ary_size, plot_hist=False pt.hist(ran.get(), 30) pt.show() - assert (0 < ran.get()).all() - assert (ran.get() < 1).all() + assert (0 <= ran.get()).all() + assert (ran.get() <= 1).all() if rng_class is RanluxGenerator: gen.synchronize(queue) ran = cl_array.zeros(queue, ary_size, dtype) gen.fill_uniform(ran, a=4, b=7) - assert (4 < ran.get()).all() - assert (ran.get() < 7).all() + ran_host = ran.get() + + for cond in [4 <= ran_host, ran_host <= 7]: + good = cond.all() + if not good: + print(np.where(~cond)) + print(ran_host[~cond]) + assert good ran = gen.normal(queue, ary_size, dtype, mu=10, sigma=3) -- GitLab From fe95f4a1b91c1a08d92d82dfac046ba4533a84b7 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 16 Aug 2018 11:55:24 -0500 Subject: [PATCH 75/92] Switch from std::r{begin,end} to container.r{begin,end} for older compilers --- src/tools.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tools.hpp b/src/tools.hpp index 30f4b08f..935dab7a 100644 --- a/src/tools.hpp +++ b/src/tools.hpp @@ -37,10 +37,10 @@ namespace pyopencl struct reversion_wrapper { T& iterable; }; template - auto begin (reversion_wrapper w) { return std::rbegin(w.iterable); } + auto begin (reversion_wrapper w) { return w.iterable.rbegin(); } template - auto end (reversion_wrapper w) { return std::rend(w.iterable); } + auto end (reversion_wrapper w) { return w.iterable.rend(); } template reversion_wrapper reverse (T&& iterable) { return { iterable }; } -- GitLab From 1a2cd280839191442a436e8bee7f941241f2b7ca Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 17 Aug 2018 12:39:56 -0500 Subject: [PATCH 76/92] Wrap --Wignored-attributes ignorer around std::vector constructor --- src/wrap_cl.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 1314a156..a20f2f0f 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -745,7 +745,16 @@ namespace pyopencl } case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: { +#if defined(__GNUG__) && !defined(__clang__) +#pragma GCC diagnostic push +// what's being ignored here is an alignment attribute to native size, which +// shouldn't matter on the relevant ABIs that I'm aware of. +#pragma GCC diagnostic ignored "-Wignored-attributes" +#endif std::vector result; +#if defined(__GNUG__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); PYOPENCL_RETURN_VECTOR(cl_device_affinity_domain, result); } -- GitLab From e0e7fd48b80e83be3bcf2ff15b8950270e7c12d7 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 17 Aug 2018 12:41:10 -0500 Subject: [PATCH 77/92] Fix build error handling --- pyopencl/__init__.py | 24 +++++++++--------------- src/wrap_cl.hpp | 8 ++++---- src/wrap_constants.cpp | 21 +++++++++++++++------ 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 2e492585..793b62d6 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -46,8 +46,6 @@ except ImportError: import numpy as np -from pytools import Record - import sys _PYPY = '__pypy__' in sys.builtin_module_names @@ -215,10 +213,6 @@ CONSTANT_CLASSES = tuple( # {{{ diagnostics -class _ErrorRecord(Record): - pass - - class CompilerWarning(UserWarning): pass @@ -524,7 +518,7 @@ class Program(object): try: return build_func() except _cl.RuntimeError as e: - msg = e.what + msg = str(e) if options_bytes: msg = msg + "\n(options: %s)" % options_bytes.decode("utf-8") @@ -542,7 +536,7 @@ class Program(object): routine = e.routine err = _cl.RuntimeError( - _cl.Error._ErrorRecord( + _cl._ErrorRecord( msg=msg, code=code, routine=routine)) @@ -688,17 +682,17 @@ def _add_functionality(): try: self._build(options=options_bytes, devices=devices) except Error as e: - what = e.what + "\n\n" + (75*"="+"\n").join( + msg = str(e) + "\n\n" + (75*"="+"\n").join( "Build on %s:\n\n%s" % (dev, log) for dev, log in self._get_build_logs()) code = e.code routine = e.routine err = _cl.RuntimeError( - _ErrorRecord( - what=lambda: what, - code=lambda: code, - routine=lambda: routine)) + _cl._ErrorRecord( + msg=msg, + code=code, + routine=routine)) if err is not None: # Python 3.2 outputs the whole list of currently active exceptions @@ -1000,7 +994,7 @@ def _add_functionality(): # {{{ Error def error_str(self): - val = self.args[0] + val = self.what try: val.routine except AttributeError: @@ -1027,7 +1021,7 @@ def _add_functionality(): return self.args[0].routine() def error_what(self): - return self.args[0].what() + return self.args[0] Error.__str__ = error_str Error.code = property(error_code) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index a20f2f0f..b2d5877e 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -398,15 +398,15 @@ namespace pyopencl class error : public std::runtime_error { private: - const char *m_routine; + std::string m_routine; cl_int m_code; public: - error(const char *rout, cl_int c, const char *msg="") - : std::runtime_error(msg), m_routine(rout), m_code(c) + error(const char *routine, cl_int c, const char *msg="") + : std::runtime_error(msg), m_routine(routine), m_code(c) { } - const char *routine() const + const std::string &routine() const { return m_routine; } diff --git a/src/wrap_constants.cpp b/src/wrap_constants.cpp index cf004bcf..e585e117 100644 --- a/src/wrap_constants.cpp +++ b/src/wrap_constants.cpp @@ -95,21 +95,30 @@ void pyopencl_expose_constants(py::module &m) } // }}} - // {{{ constants -#define ADD_ATTR(PREFIX, NAME) \ - cls.attr(#NAME) = CL_##PREFIX##NAME -#define ADD_ATTR_SUFFIX(PREFIX, NAME, SUFFIX) \ - cls.attr(#NAME) = CL_##PREFIX##NAME##SUFFIX + // {{{ error record { typedef error cls; - py::class_ (m, "_error") + py::class_ (m, "_ErrorRecord") + .def(py::init(), + py::arg("routine"), + py::arg("code"), + py::arg("msg")) .DEF_SIMPLE_METHOD(routine) .DEF_SIMPLE_METHOD(code) .DEF_SIMPLE_METHOD(what) + .DEF_SIMPLE_METHOD(is_out_of_memory) ; } + // }}} + + // {{{ constants +#define ADD_ATTR(PREFIX, NAME) \ + cls.attr(#NAME) = CL_##PREFIX##NAME +#define ADD_ATTR_SUFFIX(PREFIX, NAME, SUFFIX) \ + cls.attr(#NAME) = CL_##PREFIX##NAME##SUFFIX + { py::class_ cls(m, "status_code"); -- GitLab From 0ecf6f8caf2016b03a03114c058d22ef1e09cb0a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 17 Aug 2018 13:03:20 -0500 Subject: [PATCH 78/92] Bump timeout on Event.set_callback test --- test/test_wrapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_wrapper.py b/test/test_wrapper.py index 69e2dc2d..94a00d85 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -834,9 +834,9 @@ def test_event_set_callback(ctx_factory): from time import sleep sleep(0.01) - # wait up to a second + # wait up to five seconds (?!) counter += 1 - if counter >= 100: + if counter >= 500: break assert got_called -- GitLab From a5d85bc7d1b3fc36c7a9c589be069606969a69ce Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 17 Aug 2018 15:04:56 -0500 Subject: [PATCH 79/92] Docs fixes/add missing pieces --- doc/index.rst | 1 + doc/misc.rst | 2 ++ pyopencl/__init__.py | 13 +++++++------ src/wrap_cl_part_1.cpp | 2 +- src/wrap_cl_part_2.cpp | 4 +++- src/wrap_constants.cpp | 1 + 6 files changed, 15 insertions(+), 8 deletions(-) diff --git a/doc/index.rst b/doc/index.rst index f771ab25..d715d1a2 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -108,6 +108,7 @@ Contents runtime_program runtime_gl array + types algorithm howto tools diff --git a/doc/misc.rst b/doc/misc.rst index 7459f89f..b4e30db7 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -121,6 +121,8 @@ checking `this file Note that the triple-quoted strings containing the source must start with `"""//CL// ..."""`. +.. _ipython-integration: + IPython integration ------------------- diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 793b62d6..042cb9da 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -1038,6 +1038,13 @@ def _add_functionality(): def memory_map_exit(self, exc_type, exc_val, exc_tb): self.release() + MemoryMap.__doc__ = """ + This class may also be used as a context manager in a ``with`` statement. + The memory corresponding to this object will be unmapped when + this object is deleted or :meth:`release` is called. + + .. automethod:: release + """ MemoryMap.__enter__ = memory_map_enter MemoryMap.__exit__ = memory_map_exit @@ -1082,12 +1089,6 @@ def _add_functionality(): if get_cl_header_version() >= (2, 0): SVMAllocation.__init__ = svmallocation_init - # FIXME - # SVMAllocation.enqueue_release.__doc__ = """ - # :returns: a :class:`pyopencl.Event` - - # |std-enqueue-blurb| - # """ # }}} diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index e102a560..cb44c6b9 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -194,7 +194,7 @@ void pyopencl_expose_part_1(py::module &m) "If the previous owner of the object will *not* release the reference, " "*retain* should be set to *False*, to effectively transfer ownership to " ":mod:`pyopencl`." - "\n\n.. versionadded:: 2013.2\n", + "\n\n.. versionadded:: 2013.2\n" "\n\n.. versionchanged:: 2016.1\n\n *retain* added.", py::arg("int_ptr_value"), py::arg("retain")=true) diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index 2e8e9e4c..0467ec18 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -245,7 +245,9 @@ void pyopencl_expose_part_2(py::module &m) py::class_(m, "SVMAllocation", py::dynamic_attr()) .def(py::init, size_t, cl_uint, cl_svm_mem_flags>()) .DEF_SIMPLE_METHOD(release) - .DEF_SIMPLE_METHOD(enqueue_release) + .def("enqueue_release", &cls::enqueue_release, + ":returns: a :class:`pyopencl.Event`\n\n" + "|std-enqueue-blurb|") .def("_ptr_as_int", &cls::ptr_as_int) .def(py::self == py::self) .def(py::self != py::self) diff --git a/src/wrap_constants.cpp b/src/wrap_constants.cpp index e585e117..7b6a97f1 100644 --- a/src/wrap_constants.cpp +++ b/src/wrap_constants.cpp @@ -797,6 +797,7 @@ void pyopencl_expose_constants(py::module &m) ADD_ATTR(KERNEL_ARG_, ADDRESS_QUALIFIER); ADD_ATTR(KERNEL_ARG_, ACCESS_QUALIFIER); ADD_ATTR(KERNEL_ARG_, TYPE_NAME); + ADD_ATTR(KERNEL_ARG_, TYPE_QUALIFIER); ADD_ATTR(KERNEL_ARG_, NAME); #endif } -- GitLab From 81bfec900233b19383933401faac9e887a9796b8 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 17 Aug 2018 15:41:01 -0500 Subject: [PATCH 80/92] Filter spurious cond var wakeups, instrument thread and callback in set_callback with logging --- src/wrap_cl.hpp | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index b2d5877e..f5ab376c 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -1502,23 +1502,37 @@ namespace pyopencl bool m_set_callback_suceeded; + bool m_notify_thread_wakeup_is_genuine; + cl_event m_event; cl_int m_command_exec_status; event_callback_info_t(py::object py_event, py::object py_callback) - : m_py_event(py_event), m_py_callback(py_callback), m_set_callback_suceeded(true) + : m_py_event(py_event), m_py_callback(py_callback), m_set_callback_suceeded(true), + m_notify_thread_wakeup_is_genuine(false) {} }; static void evt_callback(cl_event evt, cl_int command_exec_status, void *user_data) { + // FIXME REMOVE + puts("event callback: started"); + event_callback_info_t *cb_info = reinterpret_cast(user_data); { std::lock_guard lg(cb_info->m_mutex); cb_info->m_event = evt; cb_info->m_command_exec_status = command_exec_status; + cb_info->m_notify_thread_wakeup_is_genuine = true; } + // FIXME REMOVE + puts("event callback: before cv notify"); + cb_info->m_condvar.notify_one(); + + // FIXME REMOVE + puts("event callback: done"); + } public: @@ -1535,9 +1549,16 @@ namespace pyopencl std::thread notif_thread([cb_info]() { + // FIXME REMOVE + puts("thread: started"); { std::unique_lock ulk(cb_info->m_mutex); - cb_info->m_condvar.wait(ulk); + cb_info->m_condvar.wait( + ulk, + [&](){ return cb_info->m_notify_thread_wakeup_is_genuine; }); + + // FIXME REMOVE + puts("thread: wait returned"); // ulk no longer held here, cb_info ready for deletion } @@ -1545,6 +1566,9 @@ namespace pyopencl { py::gil_scoped_acquire acquire; + // FIXME REMOVE + puts("thread: gil acquired"); + if (cb_info->m_set_callback_suceeded) { try { @@ -1561,9 +1585,15 @@ namespace pyopencl } } + // FIXME REMOVE + puts("thread: before delete"); + // Need to hold GIL to delete py::object instances in // event_callback_info_t delete cb_info; + // + // FIXME REMOVE + puts("thread: ending"); } }); // Thread is away--it is now its responsibility to free cb_info. @@ -1583,6 +1613,7 @@ namespace pyopencl { std::lock_guard lg(cb_info->m_mutex); cb_info->m_set_callback_suceeded = false; + cb_info->m_notify_thread_wakeup_is_genuine = true; } cb_info->m_condvar.notify_one(); throw; -- GitLab From 304fedf61dbd8a4ddd114bd11ad465250d1e7452 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 17 Aug 2018 16:44:14 -0500 Subject: [PATCH 81/92] Remove set_event instrumentation --- src/wrap_cl.hpp | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index f5ab376c..ae5d3dc2 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -1515,9 +1515,6 @@ namespace pyopencl static void evt_callback(cl_event evt, cl_int command_exec_status, void *user_data) { - // FIXME REMOVE - puts("event callback: started"); - event_callback_info_t *cb_info = reinterpret_cast(user_data); { std::lock_guard lg(cb_info->m_mutex); @@ -1525,14 +1522,8 @@ namespace pyopencl cb_info->m_command_exec_status = command_exec_status; cb_info->m_notify_thread_wakeup_is_genuine = true; } - // FIXME REMOVE - puts("event callback: before cv notify"); cb_info->m_condvar.notify_one(); - - // FIXME REMOVE - puts("event callback: done"); - } public: @@ -1549,26 +1540,18 @@ namespace pyopencl std::thread notif_thread([cb_info]() { - // FIXME REMOVE - puts("thread: started"); { std::unique_lock ulk(cb_info->m_mutex); cb_info->m_condvar.wait( ulk, [&](){ return cb_info->m_notify_thread_wakeup_is_genuine; }); - // FIXME REMOVE - puts("thread: wait returned"); - // ulk no longer held here, cb_info ready for deletion } { py::gil_scoped_acquire acquire; - // FIXME REMOVE - puts("thread: gil acquired"); - if (cb_info->m_set_callback_suceeded) { try { @@ -1585,15 +1568,9 @@ namespace pyopencl } } - // FIXME REMOVE - puts("thread: before delete"); - // Need to hold GIL to delete py::object instances in // event_callback_info_t delete cb_info; - // - // FIXME REMOVE - puts("thread: ending"); } }); // Thread is away--it is now its responsibility to free cb_info. -- GitLab From d405ad31a6e737dfe22dc120bf2a2c8ee4c5b8c1 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 19 Aug 2018 15:23:16 -0500 Subject: [PATCH 82/92] Reenable compilation in Pypy --- .gitlab-ci.yml | 4 ---- src/numpy_init.hpp | 4 ++++ src/wrap_cl.hpp | 9 +++++++++ src/wrap_cl_part_1.cpp | 3 +++ src/wrap_cl_part_2.cpp | 3 +++ 5 files changed, 19 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e5a5b44a..6fe9ba49 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -185,10 +185,6 @@ PyPy POCL: - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" - - # FIXME: For now, on the pybind11 branch - allow_failure: true - tags: - pypy - pocl diff --git a/src/numpy_init.hpp b/src/numpy_init.hpp index 2b54a2a5..9d6393f4 100644 --- a/src/numpy_init.hpp +++ b/src/numpy_init.hpp @@ -13,7 +13,11 @@ namespace { static bool do_import_array() { +#ifdef PYPY_VERSION + import_array(); +#else import_array1(false); +#endif return true; } diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index ae5d3dc2..81ff64a3 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -3103,6 +3103,8 @@ namespace pyopencl + // FIXME: Reenable in pypy +#ifndef PYPY_VERSION inline py::object enqueue_map_buffer( std::shared_ptr cq, @@ -3177,10 +3179,13 @@ namespace pyopencl result, handle_from_new_ptr(new event(evt_handle))); } +#endif + // FIXME: Reenable in pypy +#ifndef PYPY_VERSION inline py::object enqueue_map_image( std::shared_ptr cq, @@ -3249,6 +3254,7 @@ namespace pyopencl handle_from_new_ptr(new event(evt_handle)), row_pitch, slice_pitch); } +#endif // }}} @@ -4794,6 +4800,8 @@ namespace pyopencl } } + // FIXME: Reenable in pypy +#ifndef PYPY_VERSION inline py::object get_mem_obj_host_array( py::object mem_obj_py, @@ -4859,6 +4867,7 @@ namespace pyopencl return result; } +#endif // }}} } diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp index cb44c6b9..c3e6d5e8 100644 --- a/src/wrap_cl_part_1.cpp +++ b/src/wrap_cl_part_1.cpp @@ -165,10 +165,13 @@ void pyopencl_expose_part_1(py::module &m) typedef memory_object_holder cls; py::class_(m, "MemoryObjectHolder", py::dynamic_attr()) .DEF_SIMPLE_METHOD(get_info) + // FIXME: Reenable in pypy +#ifndef PYPY_VERSION .def("get_host_array", get_mem_obj_host_array, py::arg("shape"), py::arg("dtype"), py::arg("order")="C") +#endif .def("__eq__", [](const cls &self, const cls &other){ return self == other; }) .def("__ne__", [](const cls &self, const cls &other){ return self != other; }) .def("__hash__", &cls::hash) diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp index 0467ec18..7b4c2c9e 100644 --- a/src/wrap_cl_part_2.cpp +++ b/src/wrap_cl_part_2.cpp @@ -204,6 +204,8 @@ void pyopencl_expose_part_2(py::module &m) ; } + // FIXME: Reenable in pypy +#ifndef PYPY_VERSION m.def("enqueue_map_buffer", enqueue_map_buffer, py::arg("queue"), py::arg("buf"), @@ -227,6 +229,7 @@ void pyopencl_expose_part_2(py::module &m) py::arg("strides")=py::none(), py::arg("wait_for")=py::none(), py::arg("is_blocking")=true); +#endif // }}} -- GitLab From 0ef5b3c10e5227975aad5ff9036b81616435003c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 19 Aug 2018 16:44:53 -0500 Subject: [PATCH 83/92] [pybind] pypy: don't try to import still-missing functions on pypy --- pyopencl/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 042cb9da..b91daedf 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -134,11 +134,9 @@ from pyopencl._cl import ( # noqa _enqueue_write_buffer_rect, _enqueue_copy_buffer_rect, - enqueue_map_buffer, _enqueue_read_image, _enqueue_copy_image, _enqueue_write_image, - enqueue_map_image, _enqueue_copy_image_to_buffer, _enqueue_copy_buffer_to_image, @@ -152,6 +150,13 @@ from pyopencl._cl import ( # noqa DeviceTopologyAmd, ) +if not _PYPY: + # FIXME: Add back to default set when pypy support catches up + from pyopencl._cl import ( # noqa + enqueue_map_buffer, + enqueue_map_image, + ) + if get_cl_header_version() >= (1, 1): from pyopencl._cl import ( # noqa UserEvent, -- GitLab From d4f5bb451a4ef6490b9a14fb963506c4cb7da530 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 19 Aug 2018 16:45:15 -0500 Subject: [PATCH 84/92] [pybind11] Use patch pybind11 on pypy --- .gitlab-ci.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6fe9ba49..61dd91a0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -181,7 +181,10 @@ PyPy POCL: script: - export PY_EXE=pypy - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="pybind11 numpy mako" + + # https://github.com/pybind/pybind11/pull/1494 + - export EXTRA_INSTALL="git+https://github.com/inducer/pybind11 numpy mako" + - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" -- GitLab From 138658c7dffc7004043905eba8117cc987b571ad Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 20 Aug 2018 11:04:11 -0500 Subject: [PATCH 85/92] [pybind] Re-allow failure on the pypy tests --- .gitlab-ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 61dd91a0..5ca50bd7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -188,6 +188,8 @@ PyPy POCL: - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" + + allow_failure: true tags: - pypy - pocl -- GitLab From c50765f34481b396d88b192ecd2e80e58f3bec32 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 20 Aug 2018 14:52:34 -0500 Subject: [PATCH 86/92] [pybind] Pypy compat tweaks --- pyopencl/array.py | 3 ++- src/wrap_cl.hpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pyopencl/array.py b/pyopencl/array.py index 704c495b..a4a5f4cf 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -663,7 +663,8 @@ class Array(object): if ary is None: ary = np.empty(self.shape, self.dtype) - ary = _as_strided(ary, strides=self.strides) + if self.strides != ary.strides: + ary = _as_strided(ary, strides=self.strides) else: if ary.size != self.size: raise TypeError("'ary' has non-matching size") diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 81ff64a3..b4d16831 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -82,7 +82,7 @@ #endif -#if PY_VERSION_HEX >= 0x03000000 +#if (PY_VERSION_HEX >= 0x03000000) or defined(PYPY_VERSION) #define PYOPENCL_USE_NEW_BUFFER_INTERFACE #define PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(s) std::move(s) #else -- GitLab From 179265258997da6f3a5aeb72001f56540dccdc14 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 20 Aug 2018 15:29:19 -0500 Subject: [PATCH 87/92] [pybind] Add pypy workaround for enqueue_fill_buffer --- pyopencl/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index b91daedf..d0904c29 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -1831,6 +1831,10 @@ def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None): from warnings import warn warn("The context for this queue does not declare OpenCL 1.2 support, so " "the next thing you might see is a crash") + + if _PYPY and isinstance(pattern, np.generic): + pattern = np.asarray(pattern) + return _cl._enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for) # }}} -- GitLab From 671f84c3b7009b0a610e743ca64ca78420475243 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 20 Aug 2018 18:10:26 -0500 Subject: [PATCH 88/92] [pybind] Handle sized numpy scalar kernel arguments in pypy --- pyopencl/invoker.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pyopencl/invoker.py b/pyopencl/invoker.py index 7fad942c..b580c537 100644 --- a/pyopencl/invoker.py +++ b/pyopencl/invoker.py @@ -59,11 +59,14 @@ del _size_t_char def generate_buffer_arg_setter(gen, arg_idx, buf_var): from pytools.py_codegen import Indentation - if _CPY2: + if _CPY2 or _PYPY: # https://github.com/numpy/numpy/issues/5381 gen("if isinstance({buf_var}, np.generic):".format(buf_var=buf_var)) with Indentation(gen): - gen("{buf_var} = np.getbuffer({buf_var})".format(buf_var=buf_var)) + if _PYPY: + gen("{buf_var} = np.asarray({buf_var})".format(buf_var=buf_var)) + else: + gen("{buf_var} = np.getbuffer({buf_var})".format(buf_var=buf_var)) gen(""" self._set_arg_buf({arg_idx}, {buf_var}) @@ -349,7 +352,7 @@ def _generate_enqueue_and_set_args_module(function_name, invoker_cache = WriteOncePersistentDict( - "pyopencl-invoker-cache-v4", + "pyopencl-invoker-cache-v6", key_builder=_NumpyTypesKeyBuilder()) -- GitLab From 9c3f1076bc6410dc9bdcdfe2e62a63fdb2c5117a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Mon, 20 Aug 2018 18:12:19 -0500 Subject: [PATCH 89/92] [pybind] xfail test_map_dtype on pypy --- test/test_wrapper.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/test_wrapper.py b/test/test_wrapper.py index 94a00d85..34b4207b 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -1016,6 +1016,12 @@ def test_fine_grain_svm(ctx_factory): cl.cltypes.uint2, ]) def test_map_dtype(ctx_factory, dtype): + from pyopencl import _PYPY + + if _PYPY: + # FIXME + pytest.xfail("enqueue_map_buffer not yet working on pypy") + ctx = ctx_factory() queue = cl.CommandQueue(ctx) -- GitLab From 290c00a63ef0aec698501de59c9c7b0cecf636da Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 21 Aug 2018 01:18:36 -0500 Subject: [PATCH 90/92] [pybind] Work around broken any-contiguity detection in pypy --- src/wrap_cl.hpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index b4d16831..9111fd12 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -445,8 +445,22 @@ namespace pyopencl void get(PyObject *obj, int flags) { +#ifdef PYPY_VERSION + // work around https://bitbucket.org/pypy/pypy/issues/2873 + if (flags & PyBUF_ANY_CONTIGUOUS) + { + int flags_wo_cont = flags & ~PyBUF_ANY_CONTIGUOUS; + if (PyObject_GetBuffer(obj, &m_buf, flags_wo_cont | PyBUF_C_CONTIGUOUS)) + { + PyErr_Clear(); + if (PyObject_GetBuffer(obj, &m_buf, flags_wo_cont | PyBUF_F_CONTIGUOUS)) + throw py::error_already_set(); + } + } +#else if (PyObject_GetBuffer(obj, &m_buf, flags)) throw py::error_already_set(); +#endif m_initialized = true; } -- GitLab From 7712375b6d3300a20281366a4a327e8b167c0696 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 21 Aug 2018 01:56:51 -0500 Subject: [PATCH 91/92] [pybind] More pypy fixes --- .gitlab-ci.yml | 1 - src/wrap_cl.hpp | 7 +++++++ test/test_wrapper.py | 15 ++++++++++----- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5ca50bd7..789cef1c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -189,7 +189,6 @@ PyPy POCL: - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" - allow_failure: true tags: - pypy - pocl diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index 9111fd12..a929d229 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -3291,7 +3291,14 @@ namespace pyopencl { #ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE ward = std::unique_ptr(new py_buffer_wrapper); +#ifdef PYPY_VERSION + // FIXME: get a read-only buffer + // Not quite honest, but Pypy doesn't consider numpy arrays + // created from objects with the __aray_interface__ writeable. + ward->get(holder.ptr(), PyBUF_ANY_CONTIGUOUS); +#else ward->get(holder.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); +#endif m_ptr = ward->m_buf.buf; m_size = ward->m_buf.len; #else diff --git a/test/test_wrapper.py b/test/test_wrapper.py index 34b4207b..4d729642 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -295,7 +295,9 @@ def test_image_format_constructor(): assert iform.channel_order == cl.channel_order.RGBA assert iform.channel_data_type == cl.channel_type.FLOAT - assert not hasattr(iform, "__dict__") + + if not cl._PYPY: + assert not hasattr(iform, "__dict__") def test_device_topology_amd_constructor(): @@ -306,7 +308,8 @@ def test_device_topology_amd_constructor(): assert topol.device == 4 assert topol.function == 5 - assert not hasattr(topol, "__dict__") + if not cl._PYPY: + assert not hasattr(topol, "__dict__") def test_nonempty_supported_image_formats(ctx_factory): @@ -738,6 +741,10 @@ def test_user_event(ctx_factory): def test_buffer_get_host_array(ctx_factory): + if cl._PYPY: + # FIXME + pytest.xfail("Buffer.get_host_array not yet working on pypy") + ctx = ctx_factory() mf = cl.mem_flags @@ -1016,9 +1023,7 @@ def test_fine_grain_svm(ctx_factory): cl.cltypes.uint2, ]) def test_map_dtype(ctx_factory, dtype): - from pyopencl import _PYPY - - if _PYPY: + if cl._PYPY: # FIXME pytest.xfail("enqueue_map_buffer not yet working on pypy") -- GitLab From ab44228a5e38330d19a9ba8973b586a65a5aa397 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 21 Aug 2018 11:20:20 -0500 Subject: [PATCH 92/92] [pybind] Fix pypy any-contiguity workaround --- src/wrap_cl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp index a929d229..ace4bd25 100644 --- a/src/wrap_cl.hpp +++ b/src/wrap_cl.hpp @@ -457,10 +457,10 @@ namespace pyopencl throw py::error_already_set(); } } -#else + else +#endif if (PyObject_GetBuffer(obj, &m_buf, flags)) throw py::error_already_set(); -#endif m_initialized = true; } -- GitLab