From d4560fa0b867a14b4739e90ab29bd88be56151b8 Mon Sep 17 00:00:00 2001 From: Marko Bencun <mbencun@gmail.com> Date: Fri, 30 Aug 2013 19:24:11 +0200 Subject: [PATCH] cffi experiments --- pyopencl/__init__.py | 65 +- pyopencl/cffi_cl.py | 493 ++++++++ src/c_wrapper/Makefile | 5 + src/c_wrapper/wrap_cl.c | 2156 ++++++++++++++++++++++++++++++++++ src/c_wrapper/wrap_cl.h | 73 ++ src/c_wrapper/wrap_cl_core.h | 34 + 6 files changed, 2798 insertions(+), 28 deletions(-) create mode 100644 pyopencl/cffi_cl.py create mode 100644 src/c_wrapper/Makefile create mode 100644 src/c_wrapper/wrap_cl.c create mode 100644 src/c_wrapper/wrap_cl.h create mode 100644 src/c_wrapper/wrap_cl_core.h diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 85791809..628443b9 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -23,9 +23,9 @@ THE SOFTWARE. """ from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa - try: - import pyopencl._cl as _cl + import pyopencl.cffi_cl as _cl + #import pyopencl._cl as _cl except ImportError: import os from os.path import dirname, join, realpath @@ -35,9 +35,13 @@ except ImportError: "its source directory. This likely won't work.") raise - +# _ccl = _cl +# import cffi_cl +# _cl = cffi_cl + import numpy as np -from pyopencl._cl import * # noqa +#from pyopencl._cl import * # noqa +from pyopencl.cffi_cl import * import inspect as _inspect CONSTANT_CLASSES = [ @@ -45,7 +49,6 @@ CONSTANT_CLASSES = [ if _inspect.isclass(getattr(_cl, name)) and name[0].islower()] - class CompilerWarning(UserWarning): pass @@ -235,29 +238,32 @@ def link_program(context, programs, options=[], devices=None): # }}} - def _add_functionality(): cls_to_info_cls = { - _cl.Platform: - (_cl.Platform.get_info, _cl.platform_info), - _cl.Device: - (_cl.Device.get_info, _cl.device_info), - _cl.Context: - (_cl.Context.get_info, _cl.context_info), - _cl.CommandQueue: - (_cl.CommandQueue.get_info, _cl.command_queue_info), - _cl.Event: - (_cl.Event.get_info, _cl.event_info), - _cl.MemoryObjectHolder: - (MemoryObjectHolder.get_info, _cl.mem_info), + # PlatformZ: + # (PlatformZ.get_info, platform_info), + # DeviceZ: + # (DeviceZ.get_info, device_info), + Platform: + (Platform.get_info, platform_info), + Device: + (Device.get_info, device_info), + Context: + (Context.get_info, context_info), + CommandQueue: + (CommandQueue.get_info, command_queue_info), + Event: + (Event.get_info, event_info), + MemoryObjectHolder: + (MemoryObjectHolder.get_info, mem_info), Image: - (_cl.Image.get_image_info, _cl.image_info), + (Image.get_image_info, image_info), Program: - (Program.get_info, _cl.program_info), + (Program.get_info, program_info), Kernel: - (Kernel.get_info, _cl.kernel_info), - _cl.Sampler: - (Sampler.get_info, _cl.sampler_info), + (Kernel.get_info, kernel_info), + Sampler: + (Sampler.get_info, sampler_info), } def to_string(cls, value, default_format=None): @@ -286,16 +292,16 @@ def _add_functionality(): for info_name, info_value in info_class.__dict__.iteritems(): if info_name == "to_string" or info_name.startswith("_"): continue - setattr(cls, info_name.lower(), make_getinfo( info_method, getattr(info_class, info_name))) - # }}} # {{{ Platform def platform_repr(self): - return "<pyopencl.Platform '%s' at 0x%x>" % (self.name, self.int_ptr) + return "<pyopencl.Platform '%s' at TODO>" % (self.name) + # TODO int_ptr + #return "<pyopencl.Platform '%s' at 0x%x>" % (self.name, self.int_ptr) Platform.__repr__ = platform_repr @@ -304,8 +310,11 @@ def _add_functionality(): # {{{ Device def device_repr(self): - return "<pyopencl.Device '%s' on '%s' at 0x%x>" % ( - self.name.strip(), self.platform.name.strip(), self.int_ptr) + return "<pyopencl.Device '%s' on TODO at TODO>" % ( + self.name.strip()) + # TODO + # return "<pyopencl.Device '%s' on '%s' at 0x%x>" % ( + # self.name.strip(), self.platform.name.strip(), self.int_ptr) Device.__repr__ = device_repr diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py new file mode 100644 index 00000000..59ae52b5 --- /dev/null +++ b/pyopencl/cffi_cl.py @@ -0,0 +1,493 @@ + +from pyopencl._cl import device_info, context_info, command_queue_info, Event, event_info, mem_info, Image, image_info, program_info, Kernel, ImageFormat, GLBuffer, kernel_info, sampler_info, Sampler, have_gl, _enqueue_read_image, _enqueue_write_image, GLTexture, channel_type, _enqueue_copy_image, _enqueue_copy_image_to_buffer, _enqueue_copy_buffer_to_image, _enqueue_write_buffer, _enqueue_copy_buffer, get_cl_header_version, _enqueue_read_buffer_rect, _enqueue_write_buffer_rect, _enqueue_copy_buffer_rect, RuntimeError, program_kind, mem_object_type, Error, platform_info, device_type, mem_flags, LogicError + +import warnings + +import os.path +current_directory = os.path.dirname(__file__) + + + + +from cffi import FFI +_ffi = FFI() +_cl_header = """ + +/* cl.h */ +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short; +typedef uint16_t cl_ushort; +typedef int32_t cl_int; +typedef uint32_t cl_uint; +typedef int64_t cl_long; +typedef uint64_t cl_ulong; + +typedef uint16_t cl_half; +typedef float cl_float; +typedef double cl_double; + + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +typedef cl_bitfield cl_command_queue_properties; +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +typedef cl_bitfield cl_mem_migration_flags; +typedef cl_uint cl_image_info; +typedef cl_uint cl_buffer_create_type; +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +typedef cl_uint cl_program_binary_type; +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; +typedef cl_uint cl_kernel_work_group_info; +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; + +/* cl_mem_flags - bitfield */ +#define CL_MEM_READ_WRITE ... +#define CL_MEM_WRITE_ONLY ... +#define CL_MEM_READ_ONLY ... +#define CL_MEM_USE_HOST_PTR ... +#define CL_MEM_ALLOC_HOST_PTR ... +#define CL_MEM_COPY_HOST_PTR ... +#define CL_MEM_HOST_WRITE_ONLY ... +#define CL_MEM_HOST_READ_ONLY ... +#define CL_MEM_HOST_NO_ACCESS ... + + +""" + +with open(os.path.join(current_directory, 'wrap_cl_core.h')) as _f: + _wrap_cl_header = _f.read() + +_ffi.cdef('%s\n%s' % (_cl_header, _wrap_cl_header)) +print current_directory +_lib = _ffi.verify( + """ + #include <wrap_cl.h> + """, + include_dirs=[os.path.join(current_directory, "../src/c_wrapper/")], + library_dirs=[current_directory], + libraries=["wrapcl", "OpenCL"]) + +class PP(object): + def __init__(self, ptr): + self.ptr = ptr + self.size = _ffi.new('uint32_t *') + + def __getitem__(self, key): + return self.ptr[0].__getitem__(key) + + def __iter__(self): + for i in xrange(self.size[0]): + yield self[i] + + def __del__(self): + _lib.freem(self.ptr[0]) + +class CLRuntimeError(RuntimeError): + def __init__(self, routine, code, msg=""): + super(CLRuntimeError, self).__init__(msg) + self.routine = routine + self.code = code + + +# plats = _ffi.new("void**") +# print _lib.get_platforms(plats) +# print _ffi.cast('int**', plats)[0][1] +# exit() + + + +# _platform_info_constants = { +# 'PROFILE': _lib.CL_PLATFORM_PROFILE, +# 'VERSION': _lib.CL_PLATFORM_VERSION, +# 'NAME': _lib.CL_PLATFORM_NAME, +# 'VENDOR': _lib.CL_PLATFORM_VENDOR, +# } + +# # re-implementation +# platform_info = type("platform_info", (NOINIT,), _platform_info_constants) + +# _device_type_constants = { +# 'DEFAULT': _lib.CL_DEVICE_TYPE_DEFAULT, +# 'CPU': _lib.CL_DEVICE_TYPE_CPU, +# 'GPU': _lib.CL_DEVICE_TYPE_GPU, +# 'ACCELERATOR': _lib.CL_DEVICE_TYPE_ACCELERATOR, +# 'ALL': _lib.CL_DEVICE_TYPE_ALL, +# } +# if _CL_VERSION >= (1, 2): +# _device_type_constants['CUSTOM'] = _lib.CL_DEVICE_TYPE_CUSTOM + +# # re-implementation +# device_type = type("device_type", (NOINIT,), _device_type_constants) + +# _mem_flags_constants = { +# 'READ_WRITE': _lib.CL_MEM_READ_WRITE, +# 'WRITE_ONLY': _lib.CL_MEM_WRITE_ONLY, +# 'READ_ONLY': _lib.CL_MEM_READ_ONLY, +# 'USE_HOST_PTR': _lib.CL_MEM_USE_HOST_PTR, +# 'ALLOC_HOST_PTR': _lib.CL_MEM_ALLOC_HOST_PTR, +# 'COPY_HOST_PTR': _lib.CL_MEM_COPY_HOST_PTR, +# # TODO_PLAT +# # #ifdef cl_amd_device_memory_flags +# # 'USE_PERSISTENT_MEM_AMD': _lib.CL_MEM_USE_PERSISTENT_MEM_AMD, +# # #endif +# } +# if _CL_VERSION >= (1, 2): +# _mem_flags_constants.update({ +# 'HOST_WRITE_ONLY': _lib.CL_MEM_HOST_WRITE_ONLY, +# 'HOST_READ_ONLY': _lib.CL_MEM_HOST_READ_ONLY, +# 'HOST_NO_ACCESS': _lib.CL_MEM_HOST_NO_ACCESS, +# }) + +# # re-implementation +# mem_flags = type("mem_flags", (NOINIT,), _mem_flags_constants) + +class EQUALITY_TESTS(object): + def __eq__(self, other): + return hash(self) == hash(other) + +class Device(EQUALITY_TESTS): + def __init__(self): + pass + + def __hash__(self): + return _lib.device__hash(self.ptr) + + # todo: __del__ + + def get_info(self, param): + if param == 4145: + return self.__dict__["platform"] # TODO HACK + value = _ffi.new('char **') + _lib.device__get_info(self.ptr, param, value) + return _ffi.string(value[0]) + +def _create_device(ptr): + device = Device() + device.ptr = ptr + return device + +def _parse_context_properties(properties): + props = [] + if properties is None: + return _ffi.NULL + + for prop_tuple in properties: + if len(prop_tuple) != 2: + raise CLRuntimeError("Context", _lib.CL_INVALID_VALUE, "property tuple must have length 2") + prop, value = prop_tuple + props.append(prop) + if prop == _lib.CL_CONTEXT_PLATFORM: + props.append(_ffi.cast('cl_context_properties', value.data())) + + else: # TODO_PLAT CL_WGL_HDC_KHR and morecc + raise CLRuntimeError("Context", _lib.CL_INVALID_VALUE, "invalid context property") + props.append(0) + return _ffi.new('cl_context_properties[]', props) + + +class Context(object): + def __init__(self, devices=None, properties=None, dev_type=None): + c_props = _parse_context_properties(properties) + status_code = _ffi.new('cl_int *') + + # from device list + if devices is not None: + if dev_type is not None: + raise CLRuntimeError("Context", _lib.CL_INVALID_VALUE, "one of 'devices' or 'dev_type' must be None") + ptr_devices = _ffi.new('cl_device_id[]', [device.ptr for device in devices]) + ptr_ctx = _ffi.new('void **') + _lib._create_context(ptr_ctx, c_props, len(ptr_devices), _ffi.cast('void**', ptr_devices)) + + else: # from dev_type + raise NotImplementedError() + + self.ptr = ptr_ctx[0] + + def get_info(self, param): + return 'TODO' + +class CommandQueue(object): + def __init__(self, context, device=None, properties=None): + if properties is None: + properties = 0 + ptr_command_queue = _ffi.new('void **') + _lib._create_command_queue(ptr_command_queue, context.ptr, _ffi.NULL if device is None else device.ptr, properties) + self.ptr = ptr_command_queue[0] + + def get_info(self, param): + print param + raise NotImplementedError() + +class MemoryObjectHolder(object): + def get_info(self, param): + info = _ffi.new('generic_info *') + _lib.memory_object_holder__get_info(self.ptr, param, info) + return _generic_info_to_python(info) + +class MemoryObject(MemoryObjectHolder): + pass + +class Buffer(MemoryObjectHolder): + def __init__(self, context, flags, size=0, hostbuf=None): + if hostbuf is not None and not (flags & (_lib.CL_MEM_USE_HOST_PTR | _lib.CL_MEM_COPY_HOST_PTR)): + warnings.warn("'hostbuf' was passed, but no memory flags to make use of it.") + c_hostbuf = _ffi.NULL + if hostbuf is not None: + # todo: buffer protocol; for now hostbuf is assumed to be a numpy array + c_hostbuf = _ffi.cast('void *', hostbuf.ctypes.data) + hostbuf_size = hostbuf.nbytes + if size > hostbuf_size: + raise CLRuntimeError("Buffer", _lib.CL_INVALID_VALUE, "specified size is greater than host buffer size") + if size == 0: + size = hostbuf_size + + ptr_buffer = _ffi.new('void **') + _lib._create_buffer(ptr_buffer, context.ptr, flags, size, c_hostbuf) + self.ptr = ptr_buffer[0] + +class _Program(object): + def __init__(self, *args): + if len(args) == 2: + self._init_source(*args) + else: + self._init_binary(*args) + + def int_ptr(self): + raise NotImplementedError() + + def from_int_ptr(self, int_ptr_value): + raise NotImplementedError() + + def _init_source(self, context, src): + ptr_program = _ffi.new('void **') + _lib._create_program_with_source(ptr_program, context.ptr, _ffi.new('char[]', src)) + self.ptr = ptr_program[0] + + def _init_binary(self, context, devices, binaries): + if len(devices) != len(binaries): + raise CLRuntimeError("create_program_with_binary", _lib.CL_INVALID_VALUE, "device and binary counts don't match") + + ptr_program = _ffi.new('void **') + ptr_devices = _ffi.new('void*[]', [device.ptr for device in devices]) + ptr_binaries = _ffi.new('char*[]', len(binaries)) + for i, binary in enumerate(binaries): + ptr_binaries[i] = _ffi.new('char[]', binary) + _lib._create_program_with_binary(ptr_program, context.ptr, len(ptr_devices), ptr_devices, len(ptr_binaries), ptr_binaries) + self.ptr = ptr_program[0] + + def kind(self): + kind = _ffi.new('int *') + _lib.program__kind(self.ptr, kind) + return kind[0] + + def _build(self, options=None, devices=None): + if devices is None: raise NotImplementedError() + # TODO: if devices is None, create them + if options is None: + options = "" + ptr_devices = _ffi.new('void*[]', [device.ptr for device in devices]) + _lib.program__build(self.ptr, _ffi.new('char[]', options), len(ptr_devices), _ffi.cast('void**', ptr_devices)) + + def get_info(self, param): + if param == program_info.DEVICES: + # todo: refactor, same code as in get_devices + devices = PP(_ffi.new('void**')) + _lib.program__get_info__devices(self.ptr, devices.ptr, devices.size) + result = [] + for i in xrange(devices.size[0]): + # TODO why is the cast needed? + device_ptr = _ffi.cast('void**', devices.ptr[0])[i] + result.append(_create_device(device_ptr)) + return result + elif param == program_info.BINARIES: + # TODO possible memory leak? the char arrays might not be freed + ptr_binaries = PP(_ffi.new('char***')) + _lib.program__get_info__binaries(self.ptr, ptr_binaries.ptr, ptr_binaries.size) + return map(_ffi.string, ptr_binaries) + print param + raise NotImplementedError() + +class Platform(object): + def __init__(self): + pass + + # todo: __del__ + + def get_info(self, param): + value = _ffi.new('char **') + _lib.platform__get_info(self.ptr, param, value) + return _ffi.string(value[0]) + + def get_devices(self, device_type=device_type.ALL): + devices = PP(_ffi.new('void**')) + _lib.platform__get_devices(self.ptr, devices.ptr, devices.size, device_type) + result = [] + for i in xrange(devices.size[0]): + # TODO why is the cast needed? + device_ptr = _ffi.cast('void**', devices.ptr[0])[i] + result.append(_create_device(device_ptr)) + # TODO remove, should be done via get_info(PLATFORM) + for r in result: + r.__dict__["platform"] = self + return result + +def _create_platform(ptr): + platform = Platform() + platform.ptr = ptr + return platform + +def _generic_info_to_python(info): + for type_ in ('cl_uint', + 'cl_mem_object_type', + ): + if info.type == getattr(_lib, 'generic_info_type_%s' % type_): + return getattr(info.value, '_%s' % type_) + raise NotImplementedError(info.type) + +class Kernel(object): + def __init__(self, program, name): + ptr_kernel = _ffi.new('void **') + _lib._create_kernel(ptr_kernel, program.ptr, name) + self.ptr = ptr_kernel[0] + + def get_info(self, param): + info = _ffi.new('generic_info *') + _lib.kernel__get_info(self.ptr, param, info) + return _generic_info_to_python(info) + #raise NotImplementedError() + + def set_arg(self, arg_index, arg): + if isinstance(arg, Buffer): + _lib.kernel__set_arg_mem_buffer(self.ptr, arg_index, arg.ptr) + else: + raise NotImplementedError() + +def get_platforms(): + platforms = PP(_ffi.new('void**')) + _lib.get_platforms(platforms.ptr, platforms.size) + result = [] + for i in xrange(platforms.size[0]): + # TODO why is the cast needed? + platform_ptr = _ffi.cast('void**', platforms.ptr[0])[i] + result.append(_create_platform(platform_ptr)) + + return result + +class Event(object): + def __init__(self): + pass + + def get_info(self, param): + print param + raise NotImplementedError() + +def _create_event(ptr): + event = Event() + event.ptr = ptr + return event + + +def enqueue_nd_range_kernel(queue, kernel, global_work_size, local_work_size, global_work_offset=None, wait_for=None, g_times_l=False): + if wait_for is not None: + raise NotImplementedError("wait_for") + work_dim = len(global_work_size) + + if local_work_size is not None: + if g_times_l: + work_dim = max(work_dim, len(local_work_size)) + elif work_dim != len(local_work_size): + raise CLRuntimeError("enqueue_nd_range_kernel", _lib.CL_INVALID_VALUE, + "global/local work sizes have differing dimensions") + + local_work_size = list(local_work_size) + + if len(local_work_size) < work_dim: + local_work_size.extend([1] * (work_dim - len(local_work_size))) + if len(global_work_size) < work_dim: + global_work_size.extend([1] * (work_dim - len(global_work_size))) + + elif g_times_l: + for i in xrange(work_dim): + global_work_size[i] *= local_work_size[i] + + if global_work_offset is not None: + raise NotImplementedError("global_work_offset") + + c_global_work_offset = _ffi.NULL + c_global_work_size = _ffi.new('const size_t[]', global_work_size) + if local_work_size is None: + c_local_work_size = _ffi.NULL + else: + c_local_work_size = _ffi.new('const size_t[]', local_work_size) + + ptr_event = _ffi.new('void **') + _lib._enqueue_nd_range_kernel( + ptr_event, + queue.ptr, + kernel.ptr, + work_dim, + c_global_work_offset, + c_global_work_size, + c_local_work_size + ) + return _create_event(ptr_event[0]) + +def _enqueue_read_buffer(cq, mem, buf, device_offset=0, is_blocking=True): + # assume numpy + c_buf = _ffi.cast('void *', buf.ctypes.data) + size = buf.nbytes + ptr_event = _ffi.new('void **') + _lib._enqueue_read_buffer( + ptr_event, + cq.ptr, + mem.ptr, + c_buf, + size, + device_offset, + bool(is_blocking) + ) + return _create_event(ptr_event[0]) + + + diff --git a/src/c_wrapper/Makefile b/src/c_wrapper/Makefile new file mode 100644 index 00000000..7114b1a3 --- /dev/null +++ b/src/c_wrapper/Makefile @@ -0,0 +1,5 @@ +all: + g++ -c -Wall -DPYOPENCL_PRETEND_CL_VERSION=4112 -Werror -fpic wrap_cl.c + g++ -shared -o libwrapcl.so wrap_cl.o + cp libwrapcl.so ../../pyopencl/ + cp wrap_cl_core.h ../../pyopencl/wrap_cl_core.h diff --git a/src/c_wrapper/wrap_cl.c b/src/c_wrapper/wrap_cl.c new file mode 100644 index 00000000..e473f21c --- /dev/null +++ b/src/c_wrapper/wrap_cl.c @@ -0,0 +1,2156 @@ +#include "wrap_cl.h" +#include <stdlib.h> +#include <vector> +#include <iostream> +#include <stdexcept> +#include <string.h> +#include <memory> + +#define MALLOC(TYPE, VAR, N) TYPE* VAR = reinterpret_cast<TYPE*>(malloc(sizeof(TYPE)*(N))); + +// {{{ tracing and error reporting +#ifdef PYOPENCL_TRACE +#define PYOPENCL_PRINT_CALL_TRACE(NAME) \ + std::cerr << NAME << std::endl; +#define PYOPENCL_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) \ + std::cerr << NAME << " (" << EXTRA_INFO << ')' << std::endl; +#else +#define PYOPENCL_PRINT_CALL_TRACE(NAME) /*nothing*/ +#define PYOPENCL_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) /*nothing*/ +#endif + +// TODO Py_BEGIN_ALLOW_THREADS \ Py_END_ALLOW_THREADS below +#define PYOPENCL_CALL_GUARDED_THREADED(NAME, ARGLIST) \ + { \ + PYOPENCL_PRINT_CALL_TRACE(#NAME); \ + cl_int status_code; \ + status_code = NAME ARGLIST; \ + if (status_code != CL_SUCCESS) \ + throw pyopencl::error(#NAME, status_code); \ + } + + +#define PYOPENCL_CALL_GUARDED(NAME, ARGLIST) \ + { \ + PYOPENCL_PRINT_CALL_TRACE(#NAME); \ + cl_int status_code; \ + status_code = NAME ARGLIST; \ + if (status_code != CL_SUCCESS) \ + throw pyopencl::error(#NAME, status_code); \ + } + +// }}} + +#define PYOPENCL_GET_VEC_INFO(WHAT, FIRST_ARG, SECOND_ARG, RES_VEC) \ + { \ + size_t size; \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, 0, 0, &size)); \ + \ + RES_VEC.resize(size / sizeof(RES_VEC.front())); \ + \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, size, \ + RES_VEC.empty( ) ? NULL : &RES_VEC.front(), &size)); \ + } + +#define PYOPENCL_WAITLIST_ARGS \ + num_events_in_wait_list, event_wait_list.empty( ) ? NULL : &event_wait_list.front() + + +#define PYOPENCL_GET_STR_INFO(WHAT, FIRST_ARG, SECOND_ARG) \ + { \ + size_t param_value_size; \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, 0, 0, ¶m_value_size)); \ + \ + std::vector<char> param_value(param_value_size); \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, param_value_size, \ + param_value.empty( ) ? NULL : ¶m_value.front(), ¶m_value_size)); \ + \ + return param_value.empty( ) ? "" : std::string(¶m_value.front(), param_value_size-1); \ + } + +#define PYOPENCL_GET_INTEGRAL_INFO(WHAT, FIRST_ARG, SECOND_ARG, TYPE) \ + { \ + TYPE param_value; \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, sizeof(param_value), ¶m_value, 0)); \ + generic_info info; \ + info.type = generic_info_type_##TYPE; \ + info.value._##TYPE = param_value; \ + return info; \ +} + + +#define PYOPENCL_RETURN_NEW_EVENT(evt) \ + try \ + { \ + return new event(evt, false); \ + } \ + catch (...) \ + { \ + clReleaseEvent(evt); \ + throw; \ + } + + +// {{{ equality testing +#define PYOPENCL_EQUALITY_TESTS(cls) \ + bool operator==(cls const &other) const \ + { return data() == other.data(); } \ + bool operator!=(cls const &other) const \ + { return data() != other.data(); } \ + long hash() const \ + { return (long) (intptr_t) data(); } +// }}} + + + +// {{{ tools +#define PYOPENCL_CAST_BOOL(B) ((B) ? CL_TRUE : CL_FALSE) + +#define PYOPENCL_PARSE_PY_DEVICES \ + std::vector<cl_device_id> devices_vec; \ + cl_uint num_devices; \ + cl_device_id *devices; \ + \ + if (py_devices.ptr() == Py_None) \ + { \ + num_devices = 0; \ + devices = 0; \ + } \ + else \ + { \ + PYTHON_FOREACH(py_dev, py_devices) \ + devices_vec.push_back( \ + py::extract<device &>(py_dev)().data()); \ + num_devices = devices_vec.size(); \ + devices = devices_vec.empty( ) ? NULL : &devices_vec.front(); \ + } \ + + + +#define PYOPENCL_RETRY_IF_MEM_ERROR(OPERATION) \ + { \ + bool failed_with_mem_error = false; \ + try \ + { \ + OPERATION \ + } \ + catch (pyopencl::error &e) \ + { \ + failed_with_mem_error = true; \ + if (!e.is_out_of_memory()) \ + throw; \ + } \ + \ + if (failed_with_mem_error) \ + { \ + /* If we get here, we got an error from CL. + * We should run the Python GC to try and free up + * some memory references. */ \ + run_python_gc(); \ + \ + /* Now retry the allocation. If it fails again, + * let it fail. */ \ + { \ + OPERATION \ + } \ + } \ + } + +// }}} + + +int get_cl_version(void) { + return PYOPENCL_CL_VERSION; +} + +extern "C" +namespace pyopencl +{ + char* _copy_str(const std::string& str) { + MALLOC(char, cstr, str.size() + 1); + strcpy(cstr, str.c_str()); + return cstr; + } + + // {{{ error + class error : public std::runtime_error + { + private: + const char *m_routine; + cl_int m_code; + + public: + error(const char *rout, cl_int c, const char *msg="") + : std::runtime_error(msg), m_routine(rout), m_code(c) + { std::cout << rout <<";" << msg<< ";" << c << std::endl; } + + const char *routine() const + { + return m_routine; + } + + cl_int code() const + { + return m_code; + } + + bool is_out_of_memory() const + { + return (code() == CL_MEM_OBJECT_ALLOCATION_FAILURE + || code() == CL_OUT_OF_RESOURCES + || code() == CL_OUT_OF_HOST_MEMORY); + } + + }; + + // }}} + + + //#define MAKE_INFO(name, type, value) { } + + + // {{{ event/synchronization + class event // : boost::noncopyable + { + private: + cl_event m_event; + + public: + event(cl_event event, bool retain) + : m_event(event) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainEvent, (event)); + } + + event(event const &src) + : m_event(src.m_event) + { PYOPENCL_CALL_GUARDED(clRetainEvent, (m_event)); } + + virtual ~event() + { + // todo + // PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseEvent, + // (m_event)); + } + + const cl_event data() const + { return m_event; } + + PYOPENCL_EQUALITY_TESTS(event); + + // py::object get_info(cl_event_info param_name) const +// { +// switch (param_name) +// { +// case CL_EVENT_COMMAND_QUEUE: +// PYOPENCL_GET_OPAQUE_INFO(Event, m_event, param_name, +// cl_command_queue, command_queue); +// case CL_EVENT_COMMAND_TYPE: +// PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name, +// cl_command_type); +// case CL_EVENT_COMMAND_EXECUTION_STATUS: +// PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name, +// cl_int); +// case CL_EVENT_REFERENCE_COUNT: +// PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name, +// cl_uint); +// #if PYOPENCL_CL_VERSION >= 0x1010 +// case CL_EVENT_CONTEXT: +// PYOPENCL_GET_OPAQUE_INFO(Event, m_event, param_name, +// cl_context, context); +// #endif + +// default: +// throw error("Event.get_info", CL_INVALID_VALUE); +// } +// } + + // py::object get_profiling_info(cl_profiling_info param_name) const + // { + // switch (param_name) + // { + // case CL_PROFILING_COMMAND_QUEUED: + // case CL_PROFILING_COMMAND_SUBMIT: + // case CL_PROFILING_COMMAND_START: + // case CL_PROFILING_COMMAND_END: + // PYOPENCL_GET_INTEGRAL_INFO(EventProfiling, m_event, param_name, + // cl_ulong); + // default: + // throw error("Event.get_profiling_info", CL_INVALID_VALUE); + // } + // } + + virtual void wait() + { + PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, (1, &m_event)); + } + }; + + // }}} + + + + // {{{ platform + class platform + { + private: + cl_platform_id m_platform; + + public: + platform(cl_platform_id pid) + : m_platform(pid) + { } + + platform(cl_platform_id pid, bool /*retain (ignored)*/) + : m_platform(pid) + { } + + cl_platform_id data() const + { + return m_platform; + } + + // TODO + // PYOPENCL_EQUALITY_TESTS(platform); + + std::string get_info(cl_platform_info param_name) const + { + switch (param_name) + { + case CL_PLATFORM_PROFILE: + case CL_PLATFORM_VERSION: + case CL_PLATFORM_NAME: + case CL_PLATFORM_VENDOR: +#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) + case CL_PLATFORM_EXTENSIONS: +#endif + PYOPENCL_GET_STR_INFO(Platform, m_platform, param_name); + + default: + throw error("Platform.get_info", CL_INVALID_VALUE); + } + } + + std::vector<cl_device_id> get_devices(cl_device_type devtype); + }; + + + inline std::vector<cl_device_id> platform::get_devices(cl_device_type devtype) + { + cl_uint num_devices = 0; + PYOPENCL_CALL_GUARDED(clGetDeviceIDs, + (m_platform, devtype, 0, 0, &num_devices)); + + std::vector<cl_device_id> devices(num_devices); + PYOPENCL_CALL_GUARDED(clGetDeviceIDs, + (m_platform, devtype, + num_devices, devices.empty( ) ? NULL : &devices.front(), &num_devices)); + + return devices; + } + + // }}} + + // {{{ device + class device // : boost::noncopyable + { + public: + enum reference_type_t { + REF_NOT_OWNABLE, + REF_FISSION_EXT, +#if PYOPENCL_CL_VERSION >= 0x1020 + REF_CL_1_2, +#endif + }; + private: + cl_device_id m_device; + reference_type_t m_ref_type; + + public: + device(cl_device_id did) + : m_device(did), m_ref_type(REF_NOT_OWNABLE) + { } + + device(cl_device_id did, bool retain, reference_type_t ref_type=REF_NOT_OWNABLE) + : m_device(did), m_ref_type(ref_type) + { + if (retain && ref_type != REF_NOT_OWNABLE) + { + if (false) + { } +#if (defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION)) + else if (ref_type == REF_FISSION_EXT) + { +#if PYOPENCL_CL_VERSION >= 0x1020 + cl_platform_id plat; + PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (m_device, CL_DEVICE_PLATFORM, + sizeof(plat), &plat, NULL)); +#endif + + PYOPENCL_GET_EXT_FUN(plat, + clRetainDeviceEXT, retain_func); + + PYOPENCL_CALL_GUARDED(retain_func, (did)); + } +#endif + +#if PYOPENCL_CL_VERSION >= 0x1020 + else if (ref_type == REF_CL_1_2) + { + PYOPENCL_CALL_GUARDED(clRetainDevice, (did)); + } +#endif + + else + throw error("Device", CL_INVALID_VALUE, + "cannot own references to devices when device fission or CL 1.2 is not available"); + } + } + + ~device() + { + if (false) + { } +#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) + else if (m_ref_type == REF_FISSION_EXT) + { +#if PYOPENCL_CL_VERSION >= 0x1020 + cl_platform_id plat; + PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (m_device, CL_DEVICE_PLATFORM, + sizeof(plat), &plat, NULL)); +#endif + + PYOPENCL_GET_EXT_FUN(plat, + clReleaseDeviceEXT, release_func); + + PYOPENCL_CALL_GUARDED_CLEANUP(release_func, (m_device)); + } +#endif + +#if PYOPENCL_CL_VERSION >= 0x1020 + else if (m_ref_type == REF_CL_1_2) + PYOPENCL_CALL_GUARDED(clReleaseDevice, (m_device)); +#endif + } + + cl_device_id data() const + { + return m_device; + } + + PYOPENCL_EQUALITY_TESTS(device); + + std::string get_info(cl_device_info param_name) const + { +#define DEV_GET_INT_INF(TYPE) PYOPENCL_GET_INTEGRAL_INFO(Device, m_device, param_name, TYPE); + + switch (param_name) + { + // case CL_DEVICE_TYPE: DEV_GET_INT_INF(cl_device_type); + // case CL_DEVICE_VENDOR_ID: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_MAX_COMPUTE_UNITS: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_MAX_WORK_GROUP_SIZE: DEV_GET_INT_INF(size_t); + + // case CL_DEVICE_MAX_WORK_ITEM_SIZES: + // { + // std::vector<size_t> result; + // PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + // PYOPENCL_RETURN_VECTOR(size_t, result); + // } + + // case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: DEV_GET_INT_INF(cl_uint); + + // case CL_DEVICE_MAX_CLOCK_FREQUENCY: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_ADDRESS_BITS: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_MAX_READ_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_MAX_MEM_ALLOC_SIZE: DEV_GET_INT_INF(cl_ulong); + // case CL_DEVICE_IMAGE2D_MAX_WIDTH: DEV_GET_INT_INF(size_t); + // case CL_DEVICE_IMAGE2D_MAX_HEIGHT: DEV_GET_INT_INF(size_t); + // case CL_DEVICE_IMAGE3D_MAX_WIDTH: DEV_GET_INT_INF(size_t); + // case CL_DEVICE_IMAGE3D_MAX_HEIGHT: DEV_GET_INT_INF(size_t); + // case CL_DEVICE_IMAGE3D_MAX_DEPTH: DEV_GET_INT_INF(size_t); + // case CL_DEVICE_IMAGE_SUPPORT: DEV_GET_INT_INF(cl_bool); + // case CL_DEVICE_MAX_PARAMETER_SIZE: DEV_GET_INT_INF(size_t); + // case CL_DEVICE_MAX_SAMPLERS: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_MEM_BASE_ADDR_ALIGN: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_SINGLE_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config); + // #ifdef CL_DEVICE_DOUBLE_FP_CONFIG + // case CL_DEVICE_DOUBLE_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config); + // #endif + // #ifdef CL_DEVICE_HALF_FP_CONFIG + // case CL_DEVICE_HALF_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config); + // #endif + + // case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: DEV_GET_INT_INF(cl_device_mem_cache_type); + // case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: DEV_GET_INT_INF(cl_ulong); + // case CL_DEVICE_GLOBAL_MEM_SIZE: DEV_GET_INT_INF(cl_ulong); + + // case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: DEV_GET_INT_INF(cl_ulong); + // case CL_DEVICE_MAX_CONSTANT_ARGS: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_LOCAL_MEM_TYPE: DEV_GET_INT_INF(cl_device_local_mem_type); + // case CL_DEVICE_LOCAL_MEM_SIZE: DEV_GET_INT_INF(cl_ulong); + // case CL_DEVICE_ERROR_CORRECTION_SUPPORT: DEV_GET_INT_INF(cl_bool); + // case CL_DEVICE_PROFILING_TIMER_RESOLUTION: DEV_GET_INT_INF(size_t); + // case CL_DEVICE_ENDIAN_LITTLE: DEV_GET_INT_INF(cl_bool); + // case CL_DEVICE_AVAILABLE: DEV_GET_INT_INF(cl_bool); + // case CL_DEVICE_COMPILER_AVAILABLE: DEV_GET_INT_INF(cl_bool); + // case CL_DEVICE_EXECUTION_CAPABILITIES: DEV_GET_INT_INF(cl_device_exec_capabilities); + // case CL_DEVICE_QUEUE_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties); + + case CL_DEVICE_NAME: + case CL_DEVICE_VENDOR: + case CL_DRIVER_VERSION: + case CL_DEVICE_PROFILE: + case CL_DEVICE_VERSION: + case CL_DEVICE_EXTENSIONS: + PYOPENCL_GET_STR_INFO(Device, m_device, param_name); + + // case CL_DEVICE_PLATFORM: + // PYOPENCL_GET_OPAQUE_INFO(Device, m_device, param_name, cl_platform_id, platform); + + // #if PYOPENCL_CL_VERSION >= 0x1010 + // case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: DEV_GET_INT_INF(cl_uint); + + // case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: DEV_GET_INT_INF(cl_uint); + + // case CL_DEVICE_HOST_UNIFIED_MEMORY: DEV_GET_INT_INF(cl_bool); + // case CL_DEVICE_OPENCL_C_VERSION: + // PYOPENCL_GET_STR_INFO(Device, m_device, param_name); + // #endif + // #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV + // case CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV: + // case CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV: + // case CL_DEVICE_REGISTERS_PER_BLOCK_NV: + // case CL_DEVICE_WARP_SIZE_NV: + // DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_GPU_OVERLAP_NV: + // case CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV: + // case CL_DEVICE_INTEGRATED_MEMORY_NV: + // DEV_GET_INT_INF(cl_bool); + // #endif + // #if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) + // case CL_DEVICE_PARENT_DEVICE_EXT: + // PYOPENCL_GET_OPAQUE_INFO(Device, m_device, param_name, cl_device_id, device); + // case CL_DEVICE_PARTITION_TYPES_EXT: + // case CL_DEVICE_AFFINITY_DOMAINS_EXT: + // case CL_DEVICE_PARTITION_STYLE_EXT: + // { + // std::vector<cl_device_partition_property_ext> result; + // PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + // PYOPENCL_RETURN_VECTOR(cl_device_partition_property_ext, result); + // } + // case CL_DEVICE_REFERENCE_COUNT_EXT: DEV_GET_INT_INF(cl_uint); + // #endif + // #if PYOPENCL_CL_VERSION >= 0x1020 + // case CL_DEVICE_LINKER_AVAILABLE: DEV_GET_INT_INF(cl_bool); + // case CL_DEVICE_BUILT_IN_KERNELS: + // PYOPENCL_GET_STR_INFO(Device, m_device, param_name); + // case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: DEV_GET_INT_INF(size_t); + // case CL_DEVICE_IMAGE_MAX_ARRAY_SIZE: DEV_GET_INT_INF(size_t); + // case CL_DEVICE_PARENT_DEVICE: + // PYOPENCL_GET_OPAQUE_INFO(Device, m_device, param_name, cl_device_id, device); + // case CL_DEVICE_PARTITION_MAX_SUB_DEVICES: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_PARTITION_TYPE: + // case CL_DEVICE_PARTITION_PROPERTIES: + // { + // std::vector<cl_device_partition_property> result; + // PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + // PYOPENCL_RETURN_VECTOR(cl_device_partition_property, result); + // } + // case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: + // { + // std::vector<cl_device_affinity_domain> result; + // PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + // PYOPENCL_RETURN_VECTOR(cl_device_affinity_domain, result); + // } + // case CL_DEVICE_REFERENCE_COUNT: DEV_GET_INT_INF(cl_uint); + // case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC: DEV_GET_INT_INF(cl_bool); + // case CL_DEVICE_PRINTF_BUFFER_SIZE: DEV_GET_INT_INF(cl_bool); + // #endif + // // {{{ AMD dev attrs + // // + // // types of AMD dev attrs divined from + // // https://www.khronos.org/registry/cl/api/1.2/cl.hpp + // #ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD + // case CL_DEVICE_PROFILING_TIMER_OFFSET_AMD: DEV_GET_INT_INF(cl_ulong); + // #endif + // /* FIXME + // #ifdef CL_DEVICE_TOPOLOGY_AMD + // case CL_DEVICE_TOPOLOGY_AMD: + // #endif + // */ + // #ifdef CL_DEVICE_BOARD_NAME_AMD + // case CL_DEVICE_BOARD_NAME_AMD: ; + // PYOPENCL_GET_STR_INFO(Device, m_device, param_name); + // #endif + // #ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD + // case CL_DEVICE_GLOBAL_FREE_MEMORY_AMD: + // { + // std::vector<size_t> result; + // PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + // PYOPENCL_RETURN_VECTOR(size_t, result); + // } + // #endif + // #ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD + // case CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD: DEV_GET_INT_INF(cl_uint); + // #endif + // #ifdef CL_DEVICE_SIMD_WIDTH_AMD + // case CL_DEVICE_SIMD_WIDTH_AMD: DEV_GET_INT_INF(cl_uint); + // #endif + // #ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD + // case CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD: DEV_GET_INT_INF(cl_uint); + // #endif + // #ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD + // case CL_DEVICE_WAVEFRONT_WIDTH_AMD: DEV_GET_INT_INF(cl_uint); + // #endif + // #ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD + // case CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD: DEV_GET_INT_INF(cl_uint); + // #endif + // #ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD + // case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD: DEV_GET_INT_INF(cl_uint); + // #endif + // #ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD + // case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD: DEV_GET_INT_INF(cl_uint); + // #endif + // #ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD + // case CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD: DEV_GET_INT_INF(cl_uint); + // #endif + // #ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD + // case CL_DEVICE_LOCAL_MEM_BANKS_AMD: DEV_GET_INT_INF(cl_uint); + // #endif + // // }}} + + // #ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT + // case CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT: DEV_GET_INT_INF(cl_uint); + // #endif + + default: + throw error("Device.get_info", CL_INVALID_VALUE); + } + } + + // #if PYOPENCL_CL_VERSION >= 0x1020 + // py::list create_sub_devices(py::object py_properties) + // { + // std::vector<cl_device_partition_property> properties; + + // COPY_PY_LIST(cl_device_partition_property, properties); + // properties.push_back(0); + + // cl_device_partition_property *props_ptr + // = properties.empty( ) ? NULL : &properties.front(); + + // cl_uint num_entries; + // PYOPENCL_CALL_GUARDED(clCreateSubDevices, + // (m_device, props_ptr, 0, NULL, &num_entries)); + + // std::vector<cl_device_id> result; + // result.resize(num_entries); + + // PYOPENCL_CALL_GUARDED(clCreateSubDevices, + // (m_device, props_ptr, num_entries, &result.front(), NULL)); + + // py::list py_result; + // BOOST_FOREACH(cl_device_id did, result) + // py_result.append(handle_from_new_ptr( + // new pyopencl::device(did, /*retain*/true, + // device::REF_CL_1_2))); + // return py_result; + // } + // #endif + + // #if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) + // py::list create_sub_devices_ext(py::object py_properties) + // { + // std::vector<cl_device_partition_property_ext> properties; + + // #if PYOPENCL_CL_VERSION >= 0x1020 + // cl_platform_id plat; + // PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (m_device, CL_DEVICE_PLATFORM, + // sizeof(plat), &plat, NULL)); + // #endif + + // PYOPENCL_GET_EXT_FUN(plat, clCreateSubDevicesEXT, create_sub_dev); + + // COPY_PY_LIST(cl_device_partition_property_ext, properties); + // properties.push_back(CL_PROPERTIES_LIST_END_EXT); + + // cl_device_partition_property_ext *props_ptr + // = properties.empty( ) ? NULL : &properties.front(); + + // cl_uint num_entries; + // PYOPENCL_CALL_GUARDED(create_sub_dev, + // (m_device, props_ptr, 0, NULL, &num_entries)); + + // std::vector<cl_device_id> result; + // result.resize(num_entries); + + // PYOPENCL_CALL_GUARDED(create_sub_dev, + // (m_device, props_ptr, num_entries, &result.front(), NULL)); + + // py::list py_result; + // BOOST_FOREACH(cl_device_id did, result) + // py_result.append(handle_from_new_ptr( + // new pyopencl::device(did, /*retain*/true, + // device::REF_FISSION_EXT))); + // return py_result; + // } + // #endif + + + }; + + // }}} + + + + // {{{ context + class context // : public boost::noncopyable + { + private: + cl_context m_context; + + public: + context(cl_context ctx, bool retain) + : m_context(ctx) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainContext, (ctx)); + } + + + ~context() + { + // TODO + // PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseContext, + // (m_context)); + } + + cl_context data() const + { + return m_context; + } + + //PYOPENCL_EQUALITY_TESTS(context); + + // py::object get_info(cl_context_info param_name) const + // { + // switch (param_name) + // { + // case CL_CONTEXT_REFERENCE_COUNT: + // PYOPENCL_GET_INTEGRAL_INFO( + // Context, m_context, param_name, cl_uint); + + // case CL_CONTEXT_DEVICES: + // { + // std::vector<cl_device_id> result; + // PYOPENCL_GET_VEC_INFO(Context, m_context, param_name, result); + + // py::list py_result; + // BOOST_FOREACH(cl_device_id did, result) + // py_result.append(handle_from_new_ptr( + // new pyopencl::device(did))); + // return py_result; + // } + + // case CL_CONTEXT_PROPERTIES: + // { + // std::vector<cl_context_properties> result; + // PYOPENCL_GET_VEC_INFO(Context, m_context, param_name, result); + + // py::list py_result; + // for (size_t i = 0; i < result.size(); i+=2) + // { + // cl_context_properties key = result[i]; + // py::object value; + // switch (key) + // { + // case CL_CONTEXT_PLATFORM: + // { + // value = py::object( + // handle_from_new_ptr(new platform( + // reinterpret_cast<cl_platform_id>(result[i+1])))); + // break; + // } + + // #if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1) + // #if defined(__APPLE__) && defined(HAVE_GL) + // case CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE: + // #else + // case CL_GL_CONTEXT_KHR: + // case CL_EGL_DISPLAY_KHR: + // case CL_GLX_DISPLAY_KHR: + // case CL_WGL_HDC_KHR: + // case CL_CGL_SHAREGROUP_KHR: + // #endif + // value = py::object(result[i+1]); + // break; + + // #endif + // case 0: + // break; + + // default: + // throw error("Context.get_info", CL_INVALID_VALUE, + // "unknown context_property key encountered"); + // } + + // py_result.append(py::make_tuple(result[i], value)); + // } + // return py_result; + // } + + // #if PYOPENCL_CL_VERSION >= 0x1010 + // case CL_CONTEXT_NUM_DEVICES: + // PYOPENCL_GET_INTEGRAL_INFO( + // Context, m_context, param_name, cl_uint); + // #endif + + // default: + // throw error("Context.get_info", CL_INVALID_VALUE); + // } + // } + // }; + + + + // }}} + + }; + + // {{{ command_queue + class command_queue + { + private: + cl_command_queue m_queue; + + public: + command_queue(cl_command_queue q, bool retain) + : m_queue(q) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (q)); + } + + command_queue(command_queue const &src) + : m_queue(src.m_queue) + { + PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue)); + } + + command_queue( + const context &ctx, + const device *py_dev=0, + cl_command_queue_properties props=0) + { + cl_device_id dev; + if (py_dev) + dev = py_dev->data(); + else + { + // TODO + // std::vector<cl_device_id> devs; + // PYOPENCL_GET_VEC_INFO(Context, ctx.data(), CL_CONTEXT_DEVICES, devs); + // if (devs.size() == 0) + // throw pyopencl::error("CommandQueue", CL_INVALID_VALUE, + // "context doesn't have any devices? -- don't know which one to default to"); + // dev = devs[0]; + } + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueue"); + m_queue = clCreateCommandQueue( + ctx.data(), dev, props, &status_code); + + if (status_code != CL_SUCCESS) { + throw pyopencl::error("CommandQueue", status_code); + } + } + + ~command_queue() + { + // TODO + // PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseCommandQueue, + // (m_queue)); + } + + const cl_command_queue data() const + { return m_queue; } + + // PYOPENCL_EQUALITY_TESTS(command_queue); + + // py::object get_info(cl_command_queue_info param_name) const + // { + // switch (param_name) + // { + // case CL_QUEUE_CONTEXT: + // PYOPENCL_GET_OPAQUE_INFO(CommandQueue, m_queue, param_name, + // cl_context, context); + // case CL_QUEUE_DEVICE: + // PYOPENCL_GET_OPAQUE_INFO(CommandQueue, m_queue, param_name, + // cl_device_id, device); + // case CL_QUEUE_REFERENCE_COUNT: + // PYOPENCL_GET_INTEGRAL_INFO(CommandQueue, m_queue, param_name, + // cl_uint); + // case CL_QUEUE_PROPERTIES: + // PYOPENCL_GET_INTEGRAL_INFO(CommandQueue, m_queue, param_name, + // cl_command_queue_properties); + + // default: + // throw error("CommandQueue.get_info", CL_INVALID_VALUE); + // } + // } + + std::auto_ptr<context> get_context() const + { + cl_context param_value; + PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, + (m_queue, CL_QUEUE_CONTEXT, sizeof(param_value), ¶m_value, 0)); + return std::auto_ptr<context>( + new context(param_value, /*retain*/ true)); + } + +#if PYOPENCL_CL_VERSION < 0x1010 + cl_command_queue_properties set_property( + cl_command_queue_properties prop, + bool enable) + { + cl_command_queue_properties old_prop; + PYOPENCL_CALL_GUARDED(clSetCommandQueueProperty, + (m_queue, prop, PYOPENCL_CAST_BOOL(enable), &old_prop)); + return old_prop; + } +#endif + + void flush() + { PYOPENCL_CALL_GUARDED(clFlush, (m_queue)); } + void finish() + { + // TODO + // PYOPENCL_CALL_GUARDED_THREADED(clFinish, (m_queue)); + } + }; + + // }}} + + + // {{{ memory_object + + //py::object create_mem_object_wrapper(cl_mem mem); + + class memory_object_holder + { + public: + virtual const cl_mem data() const = 0; + + //PYOPENCL_EQUALITY_TESTS(memory_object_holder); + + size_t size() const + { + size_t param_value; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (data(), CL_MEM_SIZE, sizeof(param_value), ¶m_value, 0)); + return param_value; + } + + generic_info get_info(cl_mem_info param_name) { + switch (param_name){ + case CL_MEM_TYPE: + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, + cl_mem_object_type); +// case CL_MEM_FLAGS: +// PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, +// cl_mem_flags); +// case CL_MEM_SIZE: +// PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, +// size_t); +// case CL_MEM_HOST_PTR: +// throw pyopencl::error("MemoryObject.get_info", CL_INVALID_VALUE, +// "Use MemoryObject.get_host_array to get host pointer."); +// case CL_MEM_MAP_COUNT: +// PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, +// cl_uint); +// case CL_MEM_REFERENCE_COUNT: +// PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, +// cl_uint); +// case CL_MEM_CONTEXT: +// PYOPENCL_GET_OPAQUE_INFO(MemObject, data(), param_name, +// cl_context, context); + +// #if PYOPENCL_CL_VERSION >= 0x1010 +// case CL_MEM_ASSOCIATED_MEMOBJECT: +// { +// cl_mem param_value; +// PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, (data(), param_name, sizeof(param_value), ¶m_value, 0)); +// if (param_value == 0) +// { +// // no associated memory object? no problem. +// return py::object(); +// } + +// return create_mem_object_wrapper(param_value); +// } +// case CL_MEM_OFFSET: +// PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, +// size_t); +// #endif + + default: + throw error("MemoryObjectHolder.get_info", CL_INVALID_VALUE); + } + } + }; + + + class memory_object : /*boost::noncopyable, */ public memory_object_holder + { + private: + bool m_valid; + cl_mem m_mem; + void* m_hostbuf; + + public: + memory_object(cl_mem mem, bool retain, void* hostbuf=0) + : m_valid(true), m_mem(mem) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainMemObject, (mem)); + + if (hostbuf) + m_hostbuf = hostbuf; + } + + memory_object(memory_object const &src) + : m_valid(true), m_mem(src.m_mem), m_hostbuf(src.m_hostbuf) + { + PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem)); + } + + memory_object(memory_object_holder const &src) + : m_valid(true), m_mem(src.data()) + { + PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem)); + } + + void release() + { + if (!m_valid) + throw error("MemoryObject.free", CL_INVALID_VALUE, + "trying to double-unref mem object"); + // TODO + //PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseMemObject, (m_mem)); + m_valid = false; + } + + virtual ~memory_object() + { + if (m_valid) + release(); + } + + void* hostbuf() + { return m_hostbuf; } + + const cl_mem data() const + { return m_mem; } + + }; + +// #if PYOPENCL_CL_VERSION >= 0x1020 +// inline +// event *enqueue_migrate_mem_objects( +// command_queue &cq, +// py::object py_mem_objects, +// cl_mem_migration_flags flags, +// py::object py_wait_for) +// { +// PYOPENCL_PARSE_WAIT_FOR; + +// std::vector<cl_mem> mem_objects; +// PYTHON_FOREACH(mo, py_mem_objects) +// mem_objects.push_back(py::extract<memory_object &>(mo)().data()); + +// cl_event evt; +// PYOPENCL_RETRY_IF_MEM_ERROR( +// PYOPENCL_CALL_GUARDED(clEnqueueMigrateMemObjects, ( +// cq.data(), +// mem_objects.size(), mem_objects.empty( ) ? NULL : &mem_objects.front(), +// flags, +// PYOPENCL_WAITLIST_ARGS, &evt +// )); +// ); +// PYOPENCL_RETURN_NEW_EVENT(evt); +// } +// #endif + +// #ifdef cl_ext_migrate_memobject +// inline +// event *enqueue_migrate_mem_object_ext( +// command_queue &cq, +// py::object py_mem_objects, +// cl_mem_migration_flags_ext flags, +// py::object py_wait_for) +// { +// PYOPENCL_PARSE_WAIT_FOR; + +// #if PYOPENCL_CL_VERSION >= 0x1020 +// // {{{ get platform +// cl_device_id dev; +// PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, (cq.data(), CL_QUEUE_DEVICE, +// sizeof(dev), &dev, NULL)); +// cl_platform_id plat; +// PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (cq.data(), CL_DEVICE_PLATFORM, +// sizeof(plat), &plat, NULL)); +// // }}} +// #endif + +// PYOPENCL_GET_EXT_FUN(plat, +// clEnqueueMigrateMemObjectEXT, enqueue_migrate_fn); + +// std::vector<cl_mem> mem_objects; +// PYTHON_FOREACH(mo, py_mem_objects) +// mem_objects.push_back(py::extract<memory_object &>(mo)().data()); + +// cl_event evt; +// PYOPENCL_RETRY_IF_MEM_ERROR( +// PYOPENCL_CALL_GUARDED(enqueue_migrate_fn, ( +// cq.data(), +// mem_objects.size(), mem_objects.empty( ) ? NULL : &mem_objects.front(), +// flags, +// PYOPENCL_WAITLIST_ARGS, &evt +// )); +// ); +// PYOPENCL_RETURN_NEW_EVENT(evt); +// } +// #endif + + // }}} + + + // {{{ buffer + + inline cl_mem create_buffer( + cl_context ctx, + cl_mem_flags flags, + size_t size, + void *host_ptr) + { + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateBuffer"); + cl_mem mem = clCreateBuffer(ctx, flags, size, host_ptr, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("create_buffer", status_code); + + return mem; + } + + + + inline cl_mem create_buffer_gc(cl_context ctx, + cl_mem_flags flags, + size_t size, + void *host_ptr) + { + // TODO + //PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( + return create_buffer(ctx, flags, size, host_ptr); + // ); + } + + + +#if PYOPENCL_CL_VERSION >= 0x1010 + inline cl_mem create_sub_buffer( + cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bct, + const void *buffer_create_info) + { + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateSubBuffer"); + cl_mem mem = clCreateSubBuffer(buffer, flags, + bct, buffer_create_info, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateSubBuffer", status_code); + + return mem; + } + + + + + inline cl_mem create_sub_buffer_gc( + cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bct, + const void *buffer_create_info) + { + // TODO + //PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( + return create_sub_buffer(buffer, flags, bct, buffer_create_info); + //); + } +#endif + +class buffer : public memory_object + { + public: + buffer(cl_mem mem, bool retain, void *hostbuf=0) + : memory_object(mem, retain, hostbuf) + { } + +// #if PYOPENCL_CL_VERSION >= 0x1010 +// buffer *get_sub_region( +// size_t origin, size_t size, cl_mem_flags flags) const +// { +// cl_buffer_region region = { origin, size}; + +// cl_mem mem = create_sub_buffer_gc( +// data(), flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion); + +// try +// { +// return new buffer(mem, false); +// } +// catch (...) +// { +// PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); +// throw; +// } +// } + +// buffer *getitem(py::slice slc) const +// { +// PYOPENCL_BUFFER_SIZE_T start, end, stride, length; + +// size_t my_length; +// PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, +// (data(), CL_MEM_SIZE, sizeof(my_length), &my_length, 0)); + +// #if PY_VERSION_HEX >= 0x03020000 +// if (PySlice_GetIndicesEx(slc.ptr(), +// #else +// if (PySlice_GetIndicesEx(reinterpret_cast<PySliceObject *>(slc.ptr()), +// #endif +// my_length, &start, &end, &stride, &length) != 0) +// throw py::error_already_set(); + +// if (stride != 1) +// throw pyopencl::error("Buffer.__getitem__", CL_INVALID_VALUE, +// "Buffer slice must have stride 1"); + +// cl_mem_flags my_flags; +// PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, +// (data(), CL_MEM_FLAGS, sizeof(my_flags), &my_flags, 0)); + +// return get_sub_region(start, end, my_flags); +// } +// #endif + }; + + // {{{ buffer creation + + inline + buffer *create_buffer_py( + context &ctx, + cl_mem_flags flags, + size_t size, + void* py_hostbuf + ) + { + + void *buf = py_hostbuf; + void *retained_buf_obj = 0; + if (py_hostbuf != NULL) + { + if (flags & CL_MEM_USE_HOST_PTR) + retained_buf_obj = py_hostbuf; + + } + + cl_mem mem = create_buffer_gc(ctx.data(), flags, size, buf); + + try + { + return new buffer(mem, false, retained_buf_obj); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); + throw; + } + } + + + // }}} + + + // {{{ program + + class program //: boost::noncopyable + { + public: + enum program_kind_type { KND_UNKNOWN, KND_SOURCE, KND_BINARY }; + + private: + cl_program m_program; + program_kind_type m_program_kind; + + public: + program(cl_program prog, bool retain, program_kind_type progkind=KND_UNKNOWN) + : m_program(prog), m_program_kind(progkind) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainProgram, (prog)); + } + + ~program() + { + // TODO + //PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseProgram, (m_program)); + } + + cl_program data() const + { + return m_program; + } + + program_kind_type kind() const + { + return m_program_kind; + } + + //PYOPENCL_EQUALITY_TESTS(program); + + std::vector<cl_device_id> get_info__devices() + { + std::vector<cl_device_id> result; + PYOPENCL_GET_VEC_INFO(Program, m_program, CL_PROGRAM_DEVICES, result); + return result; + } + + char** get_info__binaries(uint32_t *num_binaries) { + std::vector<size_t> sizes; + PYOPENCL_GET_VEC_INFO(Program, m_program, CL_PROGRAM_BINARY_SIZES, sizes); + + *num_binaries = sizes.size(); + + MALLOC(char *, result_ptrs, sizes.size()); + + for (unsigned i = 0; i < sizes.size(); ++i) { + result_ptrs[i] = new char[sizes[i]+1]; + result_ptrs[i][sizes[i]] = '\0'; + } + PYOPENCL_CALL_GUARDED(clGetProgramInfo, + (m_program, CL_PROGRAM_BINARIES, sizes.size()*sizeof(char *), + result_ptrs, 0)); \ + return result_ptrs; +// py::list py_result; +// ptr = result.get(); +// for (unsigned i = 0; i < sizes.size(); ++i) { +// py::handle<> binary_pyobj( +// #if PY_VERSION_HEX >= 0x03000000 +// PyBytes_FromStringAndSize( +// reinterpret_cast<char *>(ptr), sizes[i]) +// #else +// PyString_FromStringAndSize( +// reinterpret_cast<char *>(ptr), sizes[i]) +// #endif +// ); +// py_result.append(binary_pyobj); +// ptr += sizes[i]; + } + + + //py::object get_info(cl_program_info param_name) const +// { +// switch (param_name) +// { +// case CL_PROGRAM_REFERENCE_COUNT: +// PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name, +// cl_uint); +// case CL_PROGRAM_CONTEXT: +// PYOPENCL_GET_OPAQUE_INFO(Program, m_program, param_name, +// cl_context, context); +// case CL_PROGRAM_NUM_DEVICES: +// PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name, +// cl_uint); +// case CL_PROGRAM_DEVICES: +// { +// std::vector<cl_device_id> result; +// PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result); + +// py::list py_result; +// BOOST_FOREACH(cl_device_id did, result) +// py_result.append(handle_from_new_ptr( +// new pyopencl::device(did))); +// return py_result; +// } +// case CL_PROGRAM_SOURCE: +// PYOPENCL_GET_STR_INFO(Program, m_program, param_name); +// case CL_PROGRAM_BINARY_SIZES: +// { +// std::vector<size_t> result; +// PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result); +// PYOPENCL_RETURN_VECTOR(size_t, result); +// } +// case CL_PROGRAM_BINARIES: +// // {{{ +// { +// std::vector<size_t> sizes; +// PYOPENCL_GET_VEC_INFO(Program, m_program, CL_PROGRAM_BINARY_SIZES, sizes); + +// size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0); + +// boost::scoped_array<unsigned char> result( +// new unsigned char[total_size]); +// std::vector<unsigned char *> result_ptrs; + +// unsigned char *ptr = result.get(); +// for (unsigned i = 0; i < sizes.size(); ++i) +// { +// result_ptrs.push_back(ptr); +// ptr += sizes[i]; +// } + +// PYOPENCL_CALL_GUARDED(clGetProgramInfo, +// (m_program, param_name, sizes.size()*sizeof(unsigned char *), +// result_ptrs.empty( ) ? NULL : &result_ptrs.front(), 0)); + +// py::list py_result; +// ptr = result.get(); +// for (unsigned i = 0; i < sizes.size(); ++i) +// { +// py::handle<> binary_pyobj( +// #if PY_VERSION_HEX >= 0x03000000 +// PyBytes_FromStringAndSize( +// reinterpret_cast<char *>(ptr), sizes[i]) +// #else +// PyString_FromStringAndSize( +// reinterpret_cast<char *>(ptr), sizes[i]) +// #endif +// ); +// py_result.append(binary_pyobj); +// ptr += sizes[i]; +// } +// return py_result; +// } +// // }}} +// #if PYOPENCL_CL_VERSION >= 0x1020 +// case CL_PROGRAM_NUM_KERNELS: +// PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name, +// size_t); +// case CL_PROGRAM_KERNEL_NAMES: +// PYOPENCL_GET_STR_INFO(Program, m_program, param_name); +// #endif + +// default: +// throw error("Program.get_info", CL_INVALID_VALUE); +// } +// } + +// py::object get_build_info( +// device const &dev, +// cl_program_build_info param_name) const +// { +// switch (param_name) +// { +// #define PYOPENCL_FIRST_ARG m_program, dev.data() // hackety hack +// case CL_PROGRAM_BUILD_STATUS: +// PYOPENCL_GET_INTEGRAL_INFO(ProgramBuild, +// PYOPENCL_FIRST_ARG, param_name, +// cl_build_status); +// case CL_PROGRAM_BUILD_OPTIONS: +// case CL_PROGRAM_BUILD_LOG: +// PYOPENCL_GET_STR_INFO(ProgramBuild, +// PYOPENCL_FIRST_ARG, param_name); +// #if PYOPENCL_CL_VERSION >= 0x1020 +// case CL_PROGRAM_BINARY_TYPE: +// PYOPENCL_GET_INTEGRAL_INFO(ProgramBuild, +// PYOPENCL_FIRST_ARG, param_name, +// cl_program_binary_type); +// #endif +// #undef PYOPENCL_FIRST_ARG + +// default: +// throw error("Program.get_build_info", CL_INVALID_VALUE); +// } +// } + + void build(char* options, cl_uint num_devices, void** ptr_devices) + { + // todo: this function should get a list of device instances, not raw pointers + // pointers are for the cffi interface and should not be here + std::vector<cl_device_id> devices(num_devices); + for(cl_uint i = 0; i < num_devices; ++i) { + devices[i] = static_cast<device*>(ptr_devices[i])->data(); + } + PYOPENCL_CALL_GUARDED_THREADED(clBuildProgram, + (m_program, num_devices, devices.empty( ) ? NULL : &devices.front(), + options, 0 ,0)); + } + +// #if PYOPENCL_CL_VERSION >= 0x1020 +// void compile(std::string options, py::object py_devices, +// py::object py_headers) +// { +// PYOPENCL_PARSE_PY_DEVICES; + +// // {{{ pick apart py_headers +// // py_headers is a list of tuples *(name, program)* + +// std::vector<std::string> header_names; +// std::vector<cl_program> programs; +// PYTHON_FOREACH(name_hdr_tup, py_headers) +// { +// if (py::len(name_hdr_tup) != 2) +// throw error("Program.compile", CL_INVALID_VALUE, +// "epxected (name, header) tuple in headers list"); +// std::string name = py::extract<std::string const &>(name_hdr_tup[0]); +// program &prg = py::extract<program &>(name_hdr_tup[1]); + +// header_names.push_back(name); +// programs.push_back(prg.data()); +// } + +// std::vector<const char *> header_name_ptrs; +// BOOST_FOREACH(std::string const &name, header_names) +// header_name_ptrs.push_back(name.c_str()); + +// // }}} + +// PYOPENCL_CALL_GUARDED_THREADED(clCompileProgram, +// (m_program, num_devices, devices, +// options.c_str(), header_names.size(), +// programs.empty() ? NULL : &programs.front(), +// header_name_ptrs.empty() ? NULL : &header_name_ptrs.front(), +// 0, 0)); +// } +// #endif + }; + + + + // {{{ kernel + class local_memory + { + private: + size_t m_size; + + public: + local_memory(size_t size) + : m_size(size) + { } + + size_t size() const + { return m_size; } + }; + + + + + class kernel // : boost::noncopyable + { + private: + cl_kernel m_kernel; + + public: + kernel(cl_kernel knl, bool retain) + : m_kernel(knl) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainKernel, (knl)); + } + + kernel(program const &prg, std::string const &kernel_name) + { + cl_int status_code; + + PYOPENCL_PRINT_CALL_TRACE("clCreateKernel"); + m_kernel = clCreateKernel(prg.data(), kernel_name.c_str(), + &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateKernel", status_code); + } + + ~kernel() + { + // todo + //PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseKernel, (m_kernel)); + } + + cl_kernel data() const + { + return m_kernel; + } + + PYOPENCL_EQUALITY_TESTS(kernel); + + void set_arg_null(cl_uint arg_index) + { + cl_mem m = 0; + PYOPENCL_CALL_GUARDED(clSetKernelArg, (m_kernel, arg_index, + sizeof(cl_mem), &m)); + } + + void set_arg_mem(cl_uint arg_index, memory_object_holder &moh) + { + cl_mem m = moh.data(); + PYOPENCL_CALL_GUARDED(clSetKernelArg, + (m_kernel, arg_index, sizeof(cl_mem), &m)); + } + + void set_arg_local(cl_uint arg_index, local_memory const &loc) + { + PYOPENCL_CALL_GUARDED(clSetKernelArg, + (m_kernel, arg_index, loc.size(), 0)); + } + + // void set_arg_sampler(cl_uint arg_index, sampler const &smp) + // { + // cl_sampler s = smp.data(); + // PYOPENCL_CALL_GUARDED(clSetKernelArg, + // (m_kernel, arg_index, sizeof(cl_sampler), &s)); + // } + + // void set_arg_buf(cl_uint arg_index, py::object py_buffer) + // { + // const void *buf; + // PYOPENCL_BUFFER_SIZE_T len; + + // if (PyObject_AsReadBuffer(py_buffer.ptr(), &buf, &len)) + // { + // PyErr_Clear(); + // throw error("Kernel.set_arg", CL_INVALID_VALUE, + // "invalid kernel argument"); + // } + + // PYOPENCL_CALL_GUARDED(clSetKernelArg, + // (m_kernel, arg_index, len, buf)); + // } + + // void set_arg(cl_uint arg_index, py::object arg) + // { + // if (arg.ptr() == Py_None) + // { + // set_arg_null(arg_index); + // return; + // } + + // py::extract<memory_object_holder &> ex_mo(arg); + // if (ex_mo.check()) + // { + // set_arg_mem(arg_index, ex_mo()); + // return; + // } + + // py::extract<local_memory const &> ex_loc(arg); + // if (ex_loc.check()) + // { + // set_arg_local(arg_index, ex_loc()); + // return; + // } + + // py::extract<sampler const &> ex_smp(arg); + // if (ex_smp.check()) + // { + // set_arg_sampler(arg_index, ex_smp()); + // return; + // } + + // set_arg_buf(arg_index, arg); + // } + + generic_info get_info(cl_kernel_info param_name) const + { + switch (param_name) + { +// case CL_KERNEL_FUNCTION_NAME: +// PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name); + case CL_KERNEL_NUM_ARGS: + case CL_KERNEL_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(Kernel, m_kernel, param_name, + cl_uint); +// case CL_KERNEL_CONTEXT: +// PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name, +// cl_context, context); +// case CL_KERNEL_PROGRAM: +// PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name, +// cl_program, program); +// #if PYOPENCL_CL_VERSION >= 0x1020 +// case CL_KERNEL_ATTRIBUTES: +// PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name); +// #endif + default: + throw error("Kernel.get_info", CL_INVALID_VALUE); + } + } + +// py::object get_work_group_info( +// cl_kernel_work_group_info param_name, +// device const &dev +// ) const +// { +// switch (param_name) +// { +// #define PYOPENCL_FIRST_ARG m_kernel, dev.data() // hackety hack +// case CL_KERNEL_WORK_GROUP_SIZE: +// PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup, +// PYOPENCL_FIRST_ARG, param_name, +// size_t); +// case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: +// { +// std::vector<size_t> result; +// PYOPENCL_GET_VEC_INFO(KernelWorkGroup, +// PYOPENCL_FIRST_ARG, param_name, result); + +// PYOPENCL_RETURN_VECTOR(size_t, result); +// } +// case CL_KERNEL_LOCAL_MEM_SIZE: +// #if PYOPENCL_CL_VERSION >= 0x1010 +// case CL_KERNEL_PRIVATE_MEM_SIZE: +// #endif +// PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup, +// PYOPENCL_FIRST_ARG, param_name, +// cl_ulong); + +// #if PYOPENCL_CL_VERSION >= 0x1010 +// case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: +// PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup, +// PYOPENCL_FIRST_ARG, param_name, +// size_t); +// #endif +// default: +// throw error("Kernel.get_work_group_info", CL_INVALID_VALUE); +// #undef PYOPENCL_FIRST_ARG +// } +// } + +// #if PYOPENCL_CL_VERSION >= 0x1020 +// py::object get_arg_info( +// cl_uint arg_index, +// cl_kernel_arg_info param_name +// ) const +// { +// switch (param_name) +// { +// #define PYOPENCL_FIRST_ARG m_kernel, arg_index // hackety hack +// case CL_KERNEL_ARG_ADDRESS_QUALIFIER: +// PYOPENCL_GET_INTEGRAL_INFO(KernelArg, +// PYOPENCL_FIRST_ARG, param_name, +// cl_kernel_arg_address_qualifier); + +// case CL_KERNEL_ARG_ACCESS_QUALIFIER: +// PYOPENCL_GET_INTEGRAL_INFO(KernelArg, +// PYOPENCL_FIRST_ARG, param_name, +// cl_kernel_arg_access_qualifier); + +// case CL_KERNEL_ARG_TYPE_NAME: +// case CL_KERNEL_ARG_NAME: +// PYOPENCL_GET_STR_INFO(KernelArg, PYOPENCL_FIRST_ARG, param_name); +// #undef PYOPENCL_FIRST_ARG +// default: +// throw error("Kernel.get_arg_info", CL_INVALID_VALUE); +// } +// } +// #endif + }; + + + // {{{ buffer transfers + + inline + event *enqueue_read_buffer( + command_queue &cq, + memory_object_holder &mem, + void* buffer, + size_t size, + size_t device_offset, + /*py::object py_wait_for,*/ + bool is_blocking) + { + // TODO + //PYOPENCL_PARSE_WAIT_FOR; + + cl_event evt; + // TODO + //PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED_THREADED(clEnqueueReadBuffer, + (cq.data(), + mem.data(), + PYOPENCL_CAST_BOOL(is_blocking), + device_offset, size, buffer, + 0, NULL, + //PYOPENCL_WAITLIST_ARGS, + &evt + )); + //); + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + + + inline + event *enqueue_copy_buffer(command_queue &cq, + memory_object_holder &src, + memory_object_holder &dst, + ptrdiff_t byte_count, + size_t src_offset, + size_t dst_offset + // , + /*py::object py_wait_for*/ + ) + { + // TODO + // PYOPENCL_PARSE_WAIT_FOR; + + if (byte_count < 0) + { + size_t byte_count_src = 0; + size_t byte_count_dst = 0; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_src, 0)); + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_dst, 0)); + byte_count = std::min(byte_count_src, byte_count_dst); + } + + cl_event evt; + // TODO + //PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueCopyBuffer, (cq.data(), + src.data(), dst.data(), + src_offset, dst_offset, + byte_count, + 0, NULL, //PYOPENCL_WAITLIST_ARGS, + &evt + )) + // ); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + // }}} + +inline event *enqueue_nd_range_kernel( + command_queue &cq, + kernel &knl, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size //, + //py::object py_global_work_offset, + //py::object py_wait_for, + ) + { + // TODO + // PYOPENCL_PARSE_WAIT_FOR; + + cl_event evt; + + // TODO: PYOPENCL_RETRY_RETURN_IF_MEM_ERROR + PYOPENCL_CALL_GUARDED(clEnqueueNDRangeKernel, + (cq.data(), + knl.data(), + work_dim, + global_work_offset, + global_work_size, + local_work_size, + 0, NULL,// PYOPENCL_WAITLIST_ARGS, + &evt + )); + PYOPENCL_RETURN_NEW_EVENT(evt); + + } + + + + // }}} + inline + program *create_program_with_source( + context &ctx, + std::string const &src) + { + const char *string = src.c_str(); + size_t length = src.size(); + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithSource"); + cl_program result = clCreateProgramWithSource( + ctx.data(), 1, &string, &length, &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateProgramWithSource", status_code); + + try + { + return new program(result, false, program::KND_SOURCE); + } + catch (...) + { + clReleaseProgram(result); + throw; + } + } + + inline + program *create_program_with_binary( + context &ctx, + cl_uint num_devices, + void** ptr_devices, + cl_uint num_binaries, + char** binaries) + { + std::vector<cl_device_id> devices; + std::vector<size_t> sizes; + std::vector<cl_int> binary_statuses; + + for (cl_uint i = 0; i < num_devices; ++i) + { + devices.push_back(static_cast<device*>(ptr_devices[i])->data()); + sizes.push_back(strlen(const_cast<const char*>(binaries[i]))); + } + binary_statuses.resize(num_devices); + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBinary"); + cl_program result = clCreateProgramWithBinary( + ctx.data(), num_devices, + devices.empty( ) ? NULL : &devices.front(), + sizes.empty( ) ? NULL : &sizes.front(), + reinterpret_cast<const unsigned char**>(const_cast<const char**>(binaries)), // todo: valid cast? + binary_statuses.empty( ) ? NULL : &binary_statuses.front(), + &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateProgramWithBinary", status_code); + + // for (cl_uint i = 0; i < num_devices; ++i) + // std::cout << i << ":" << binary_statuses[i] << std::endl; + + try + { + return new program(result, false, program::KND_BINARY); + } + catch (...) + { + clReleaseProgram(result); + throw; + } + } + + + + + + + void* get_platforms(void** ptr_platforms, uint32_t *num_platforms) { + *num_platforms = 0; + PYOPENCL_CALL_GUARDED(clGetPlatformIDs, (0, 0, num_platforms)); + + typedef std::vector<cl_platform_id> vec; + vec platforms(*num_platforms); + PYOPENCL_CALL_GUARDED(clGetPlatformIDs, + (*num_platforms, platforms.empty( ) ? NULL : &platforms.front(), num_platforms)); + + MALLOC(platform*, _ptr_platforms, *num_platforms); + for(vec::size_type i = 0; i < platforms.size(); ++i) { + _ptr_platforms[i] = new platform(platforms[i]); + } + *ptr_platforms = _ptr_platforms; + return 0; + } + + void freem(void* p) { + free(p); + } + + void* platform__get_info(void* ptr_platform, cl_platform_info param_name, char** out) { + // todo: catch error + *out = _copy_str(static_cast<platform*>(ptr_platform)->get_info(param_name)); + return 0; + } + + void* platform__get_devices(void* ptr_platform, void** ptr_devices, uint32_t* num_devices, cl_device_type devtype) { + typedef std::vector<cl_device_id> vec; + + // todo: catch error + vec devices = static_cast<platform*>(ptr_platform)->get_devices(devtype); + *num_devices = devices.size(); + + MALLOC(device*, _ptr_devices, *num_devices); + for(vec::size_type i = 0; i < devices.size(); ++i) { + _ptr_devices[i] = new device(devices[i]); + } + *ptr_devices = _ptr_devices; + + return 0; + } + + void* device__get_info(void* ptr_device, cl_device_info param_name, char** out) { + // todo: catch error + *out = _copy_str(static_cast<device*>(ptr_device)->get_info(param_name)); + return 0; + } + + void* _create_context(void** ptr_ctx, cl_context_properties* properties, cl_uint num_devices, void** ptr_devices) { + + cl_int status_code; + std::vector<cl_device_id> devices(num_devices); + for(cl_uint i = 0; i < num_devices; ++i) { + devices[i] = static_cast<device*>(ptr_devices[i])->data(); + } + cl_context ctx = clCreateContext(properties, + num_devices, + devices.empty() ? NULL : &devices.front(), + 0, 0, &status_code); + if (status_code != CL_SUCCESS) { + std::cout << status_code << std::endl; + // TODO error handling + //throw pyopencl::error("Context", status_code); + } + *ptr_ctx = new context(ctx, false); + + return 0; + } + + void* _create_command_queue(void** ptr_command_queue, void* ptr_context, void* ptr_device, cl_command_queue_properties properties) { + // todo error handling + context* ctx = static_cast<context*>(ptr_context); + device* dev = static_cast<device*>(ptr_device); + *ptr_command_queue = new command_queue(*ctx, dev, properties); + return 0; + } + + void* _create_buffer(void** ptr_buffer, void* ptr_context, cl_mem_flags flags, size_t size, void* hostbuf) { + context* ctx = static_cast<context*>(ptr_context); + *ptr_buffer = create_buffer_py(*ctx, flags, size, hostbuf); + // todo error handling + return 0; + } + + void* _create_program_with_source(void **ptr_program, void *ptr_context, char* src) { + context* ctx = static_cast<context*>(ptr_context); + *ptr_program = create_program_with_source(*ctx, src); + // todo error handling + return 0; + } + + void* _create_program_with_binary(void **ptr_program, void *ptr_context, cl_uint num_devices, void** ptr_devices, cl_uint num_binaries, char** binaries) { + // todo: catch error + context* ctx = static_cast<context*>(ptr_context); + *ptr_program = create_program_with_binary(*ctx, num_devices, ptr_devices, num_binaries, binaries); + return 0; + } + + void* program__build(void* ptr_program, char* options, cl_uint num_devices, void** ptr_devices) { + // todo: catch error + static_cast<program*>(ptr_program)->build(options, num_devices, ptr_devices); + return 0; + } + + void* program__kind(void* ptr_program, int *kind) { + // todo: catch error + *kind = static_cast<program*>(ptr_program)->kind(); + return 0; + } + + void* program__get_info__devices(void* ptr_program, void** ptr_devices, uint32_t* num_devices) { + + typedef std::vector<cl_device_id> vec; + + // todo: refactor, same as get_devices() + + // todo: catch error + vec devices = static_cast<program*>(ptr_program)->get_info__devices(); + *num_devices = devices.size(); + + MALLOC(device*, _ptr_devices, *num_devices); + for(vec::size_type i = 0; i < devices.size(); ++i) { + _ptr_devices[i] = new device(devices[i]); + } + *ptr_devices = _ptr_devices; + + return 0; + + } + + void* program__get_info__binaries(void* ptr_program, char*** ptr_binaries, uint32_t* num_binaries) { + // todo catch error + *ptr_binaries = static_cast<program*>(ptr_program)->get_info__binaries(num_binaries); + return 0; + } + + long device__hash(void *ptr_device) { + return static_cast<device*>(ptr_device)->hash(); + } + + void* _create_kernel(void** ptr_kernel, void* ptr_program, char* name) { + program* prg = static_cast<program*>(ptr_program); + *ptr_kernel = new kernel(*prg, name); + // todo error handling + return 0; + } + + void* kernel__get_info(void* ptr_kernel, cl_kernel_info param, generic_info* out) { + *out = static_cast<kernel*>(ptr_kernel)->get_info(param); + // todo error handling + return 0; + } + + void* kernel__set_arg_mem_buffer(void* ptr_kernel, cl_uint arg_index, void* ptr_buffer) { + buffer* buf = static_cast<buffer*>(ptr_buffer); + static_cast<kernel*>(ptr_kernel)->set_arg_mem(arg_index, *buf); + // todo error handling + return 0; + } + + void* _enqueue_nd_range_kernel(void **ptr_event, void* ptr_command_queue, void* ptr_kernel, cl_uint work_dim, const size_t* global_work_offset, const size_t* global_work_size, const size_t* local_work_size) { + *ptr_event = enqueue_nd_range_kernel(*static_cast<command_queue*>(ptr_command_queue), + *static_cast<kernel*>(ptr_kernel), + work_dim, + global_work_offset, + global_work_size, + local_work_size); + // todo error handling + + return 0; + } + + + void* _enqueue_read_buffer(void **ptr_event, void* ptr_command_queue, void* ptr_memory_object_holder, void* buffer, size_t size, size_t device_offset, int is_blocking) { + *ptr_event = enqueue_read_buffer(*static_cast<command_queue*>(ptr_command_queue), + *static_cast<memory_object_holder*>(ptr_memory_object_holder), + buffer, size, device_offset, (bool)is_blocking); + // todo error handling + return 0; + } + + void* memory_object_holder__get_info(void* ptr_memory_object_holder, cl_mem_info param, generic_info* out) { + *out = static_cast<memory_object_holder*>(ptr_memory_object_holder)->get_info(param); + // todo error handling + return 0; + } + + +} + + + + diff --git a/src/c_wrapper/wrap_cl.h b/src/c_wrapper/wrap_cl.h new file mode 100644 index 00000000..3abd74cf --- /dev/null +++ b/src/c_wrapper/wrap_cl.h @@ -0,0 +1,73 @@ +#ifndef _WRAP_CL_H +#define _WRAP_CL_H + + +// CL 1.2 undecided: +// clSetPrintfCallback + +// {{{ includes + +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS + +#ifdef __APPLE__ + +// Mac ------------------------------------------------------------------------ +#include <OpenCL/opencl.h> +#ifdef HAVE_GL + +#define PYOPENCL_GL_SHARING_VERSION 1 + +#include <OpenGL/OpenGL.h> +#include <OpenCL/cl_gl.h> +#include <OpenCL/cl_gl_ext.h> +#endif + +#else + +// elsewhere ------------------------------------------------------------------ +#include <CL/cl.h> +#include <CL/cl_ext.h> + +#if defined(_WIN32) +#define NOMINMAX +#include <windows.h> +#endif + +#ifdef HAVE_GL +#include <GL/gl.h> +#include <CL/cl_gl.h> +#endif + +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) +#define PYOPENCL_GL_SHARING_VERSION cl_khr_gl_sharing +#endif + +#endif + + +#ifdef PYOPENCL_PRETEND_CL_VERSION +#define PYOPENCL_CL_VERSION PYOPENCL_PRETEND_CL_VERSION +#else + +#if defined(CL_VERSION_1_2) +#define PYOPENCL_CL_VERSION 0x1020 +#elif defined(CL_VERSION_1_1) +#define PYOPENCL_CL_VERSION 0x1010 +#else +#define PYOPENCL_CL_VERSION 0x1000 +#endif + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + #include "wrap_cl_core.h" +#ifdef __cplusplus +} +#endif + +#endif + + diff --git a/src/c_wrapper/wrap_cl_core.h b/src/c_wrapper/wrap_cl_core.h new file mode 100644 index 00000000..f25b2b4b --- /dev/null +++ b/src/c_wrapper/wrap_cl_core.h @@ -0,0 +1,34 @@ + typedef enum { + generic_info_type_cl_uint, + generic_info_type_cl_mem_object_type, + } generic_info_type_t; + typedef struct { + generic_info_type_t type; + union value_t { + cl_uint _cl_uint; + cl_mem_object_type _cl_mem_object_type; + } value; + } generic_info; + + int get_cl_version(void); + void* get_platforms(void** ptr_platforms, uint32_t* num_platforms); + void* platform__get_info(void* ptr_platform, cl_platform_info param_name, char** out); + void* platform__get_devices(void* ptr_platform, void** ptr_devices, uint32_t* num_devices, cl_device_type devtype); + void* device__get_info(void* ptr_device, cl_device_info param_name, char** out); + long device__hash(void *ptr_device); + void* _create_context(void** ptr_ctx, cl_context_properties* properties, cl_uint num_devices, void** ptr_devices); + void* _create_command_queue(void** ptr_command_queue, void* ptr_context, void* ptr_device, cl_command_queue_properties properties); + void* _create_buffer(void** ptr_buffer, void* ptr_context, cl_mem_flags flags, size_t size, void* hostbuf); + void* _create_program_with_source(void **ptr_program, void *ptr_context, char* src); + void* _create_program_with_binary(void **ptr_program, void *ptr_context, cl_uint num_devices, void** ptr_devices, cl_uint num_binaries, char** binaries); + void* program__build(void* ptr_program, char* options, cl_uint num_devices, void** ptr_devices); + void* program__kind(void* ptr_program, int *kind); + void* program__get_info__devices(void* ptr_program, void** ptr_devices, uint32_t* num_devices); + void* program__get_info__binaries(void* ptr_program, char*** ptr_binaries, uint32_t* num_binaries); + void* _create_kernel(void** ptr_kernel, void* ptr_program, char* name); + void* kernel__get_info(void *ptr_kernel, cl_kernel_info param, generic_info* out); + void* kernel__set_arg_mem_buffer(void* ptr_kernel, cl_uint arg_index, void* ptr_buffer); + void* _enqueue_nd_range_kernel(void **ptr_event, void* ptr_command_queue, void* ptr_kernel, cl_uint work_dim, const size_t* global_work_offset, const size_t* global_work_size, const size_t* local_work_size); + void* _enqueue_read_buffer(void **ptr_event, void* ptr_command_queue, void* ptr_memory_object_holder, void* buffer, size_t size, size_t device_offset, int is_blocking); + void* memory_object_holder__get_info(void* ptr_memory_object_holder, cl_mem_info param, generic_info* out); + void freem(void*); -- GitLab