From 107bf1c399bd5c03dac1df3e1fc21f6b26a5ffa1 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Thu, 4 Aug 2016 14:25:35 -0500 Subject: [PATCH] Strip away the monkeypatching layer --- pyopencl/__init__.py | 828 +------------------------------------------ pyopencl/cache.py | 2 +- pyopencl/cffi_cl.py | 808 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 770 insertions(+), 868 deletions(-) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 49ec01ef..9100c844 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -26,11 +26,9 @@ THE SOFTWARE. import re import six -import sys -from six.moves import input, range, intern +from six.moves import input from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa -from pytools import Record as _Record try: import pyopencl.cffi_cl as _cl @@ -43,9 +41,6 @@ except ImportError: "its source directory. This likely won't work.") raise -_CPY2 = _cl._CPY2 -_CPY26 = _cl._CPY2 and sys.version_info < (2, 7) - import numpy as np from pyopencl.cffi_cl import ( # noqa @@ -117,6 +112,7 @@ from pyopencl.cffi_cl import ( # noqa MemoryObject, MemoryMap, Buffer, + CompilerWarning, _Program, Kernel, @@ -166,6 +162,8 @@ from pyopencl.cffi_cl import ( # noqa Sampler, GLTexture, DeviceTopologyAmd, + + add_get_info_attrs as _add_get_info_attrs, ) if _cl.have_gl(): @@ -183,54 +181,6 @@ if _cl.have_gl(): pass -import inspect as _inspect - -CONSTANT_CLASSES = [ - getattr(_cl, name) for name in dir(_cl) - if _inspect.isclass(getattr(_cl, name)) - and name[0].islower() and name not in ["zip", "map", "range"]] - - -# {{{ diagnostics - -class CompilerWarning(UserWarning): - pass - - -def compiler_output(text): - import os - from warnings import warn - if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")): - warn(text, CompilerWarning) - else: - warn("Non-empty compiler output encountered. Set the " - "environment variable PYOPENCL_COMPILER_OUTPUT=1 " - "to see more.", CompilerWarning) - - -class _ErrorRecord(_Record): - pass - -# }}} - - -# {{{ arg packing helpers - -_size_t_char = ({ - 8: 'Q', - 4: 'L', - 2: 'H', - 1: 'B', -})[_cl._ffi.sizeof('size_t')] -_type_char_map = { - 'n': _size_t_char.lower(), - 'N': _size_t_char -} -del _size_t_char - -# }}} - - # {{{ find pyopencl shipped source code def _find_pyopencl_include_path(): @@ -256,7 +206,7 @@ def _find_pyopencl_include_path(): # }}} -# {{{ Program (including caching support) +# {{{ Program (wrapper around _Program, adds caching support) _DEFAULT_BUILD_OPTIONS = [] _DEFAULT_INCLUDE_OPTIONS = ["-I", _find_pyopencl_include_path()] @@ -507,7 +457,7 @@ class Program(object): routine = e.routine err = _cl.RuntimeError( - _ErrorRecord( + _cl.Error._ErrorRecord( what=lambda: what, code=lambda: code, routine=lambda: routine)) @@ -532,6 +482,8 @@ class Program(object): def __hash__(self): return hash(self._get_prg()) +_add_get_info_attrs(Program, Program.get_info, program_info) + def create_program_with_built_in_kernels(context, devices, kernel_names): if not isinstance(kernel_names, str): @@ -548,764 +500,7 @@ def link_program(context, programs, options=[], devices=None): # }}} -def _add_functionality(): - cls_to_info_cls = { - _cl.Platform: (_cl.Platform.get_info, _cl.platform_info, []), - _cl.Device: (_cl.Device.get_info, _cl.device_info, - ["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"]), - _cl.Context: (_cl.Context.get_info, _cl.context_info, []), - _cl.CommandQueue: (_cl.CommandQueue.get_info, _cl.command_queue_info, - ["CONTEXT", "DEVICE"]), - _cl.Event: (_cl.Event.get_info, _cl.event_info, []), - _cl.MemoryObjectHolder: - (MemoryObjectHolder.get_info, _cl.mem_info, []), - Image: (_cl.Image.get_image_info, _cl.image_info, []), - Program: (Program.get_info, _cl.program_info, []), - Kernel: (Kernel.get_info, _cl.kernel_info, []), - _cl.Sampler: (Sampler.get_info, _cl.sampler_info, []), - } - - def to_string(cls, value, default_format=None): - for name in dir(cls): - if (not name.startswith("_") and getattr(cls, name) == value): - return name - - if default_format is None: - raise ValueError("a name for value %d was not found in %s" - % (value, cls.__name__)) - else: - return default_format % value - - for cls in CONSTANT_CLASSES: - cls.to_string = classmethod(to_string) - - # {{{ get_info attributes ------------------------------------------------- - - def make_getinfo(info_method, info_name, info_attr): - def result(self): - return info_method(self, info_attr) - - return property(result) - - def make_cacheable_getinfo(info_method, info_name, cache_attr, info_attr): - def result(self): - try: - return getattr(self, cache_attr) - except AttributeError: - pass - - result = info_method(self, info_attr) - setattr(self, cache_attr, result) - return result - - return property(result) - - for cls, (info_method, info_class, cacheable_attrs) \ - in six.iteritems(cls_to_info_cls): - for info_name, info_value in six.iteritems(info_class.__dict__): - if info_name == "to_string" or info_name.startswith("_"): - continue - - info_lower = info_name.lower() - info_constant = getattr(info_class, info_name) - if info_name in cacheable_attrs: - cache_attr = intern("_info_cache_"+info_lower) - setattr(cls, info_lower, make_cacheable_getinfo( - info_method, info_lower, cache_attr, info_constant)) - else: - setattr(cls, info_lower, make_getinfo( - info_method, info_name, info_constant)) - - # }}} - - # {{{ Platform - - def platform_repr(self): - return "<pyopencl.Platform '%s' at 0x%x>" % (self.name, self.int_ptr) - - def platform_get_cl_version(self): - import re - version_string = self.version - match = re.match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", version_string) - if match is None: - raise RuntimeError("platform %s returned non-conformant " - "platform version string '%s'" % - (self, version_string)) - - return int(match.group(1)), int(match.group(2)) - - Platform.__repr__ = platform_repr - Platform._get_cl_version = platform_get_cl_version - - # }}} - - # {{{ Device - - def device_repr(self): - return "<pyopencl.Device '%s' on '%s' at 0x%x>" % ( - self.name.strip(), self.platform.name.strip(), self.int_ptr) - - def device_persistent_unique_id(self): - return (self.vendor, self.vendor_id, self.name, self.version) - - Device.__repr__ = device_repr - - # undocumented for now: - Device.persistent_unique_id = property(device_persistent_unique_id) - - # }}} - - # {{{ Context - - def context_repr(self): - return "<pyopencl.Context at 0x%x on %s>" % (self.int_ptr, - ", ".join(repr(dev) for dev in self.devices)) - - def context_get_cl_version(self): - return self.devices[0].platform._get_cl_version() - - Context.__repr__ = context_repr - from pytools import memoize_method - Context._get_cl_version = memoize_method(context_get_cl_version) - - # }}} - - # {{{ CommandQueue - - def command_queue_enter(self): - return self - - def command_queue_exit(self, exc_type, exc_val, exc_tb): - self.finish() - - def command_queue_get_cl_version(self): - return self.context._get_cl_version() - - CommandQueue.__enter__ = command_queue_enter - CommandQueue.__exit__ = command_queue_exit - CommandQueue._get_cl_version = memoize_method(command_queue_get_cl_version) - - # }}} - - # {{{ _Program (the internal, non-caching version) - - def program_get_build_logs(self): - build_logs = [] - for dev in self.get_info(_cl.program_info.DEVICES): - try: - log = self.get_build_info(dev, program_build_info.LOG) - except: - log = "<error retrieving log>" - - build_logs.append((dev, log)) - - return build_logs - - def program_build(self, options_bytes, devices=None): - err = None - try: - self._build(options=options_bytes, devices=devices) - except Error as e: - what = e.what + "\n\n" + (75*"="+"\n").join( - "Build on %s:\n\n%s" % (dev, log) - for dev, log in self._get_build_logs()) - code = e.code - routine = e.routine - - err = _cl.RuntimeError( - _ErrorRecord( - what=lambda: what, - code=lambda: code, - routine=lambda: routine)) - - if err is not None: - # Python 3.2 outputs the whole list of currently active exceptions - # This serves to remove one (redundant) level from that nesting. - raise err - - message = (75*"="+"\n").join( - "Build on %s succeeded, but said:\n\n%s" % (dev, log) - for dev, log in self._get_build_logs() - if log is not None and log.strip()) - - if message: - if self.kind() == program_kind.SOURCE: - build_type = "From-source build" - elif self.kind() == program_kind.BINARY: - build_type = "From-binary build" - else: - build_type = "Build" - - compiler_output("%s succeeded, but resulted in non-empty logs:\n%s" - % (build_type, message)) - - return self - - _cl._Program._get_build_logs = program_get_build_logs - _cl._Program.build = program_build - - # }}} - - # {{{ Event - class ProfilingInfoGetter: - def __init__(self, event): - self.event = event - - def __getattr__(self, name): - info_cls = _cl.profiling_info - - try: - inf_attr = getattr(info_cls, name.upper()) - except AttributeError: - raise AttributeError("%s has no attribute '%s'" - % (type(self), name)) - else: - return self.event.get_profiling_info(inf_attr) - - _cl.Event.profile = property(ProfilingInfoGetter) - - # }}} - - # {{{ Kernel - - kernel_old_init = Kernel.__init__ - kernel_old_get_work_group_info = Kernel.get_work_group_info - - def kernel_init(self, prg, name): - if not isinstance(prg, _cl._Program): - prg = prg._get_prg() - - kernel_old_init(self, prg, name) - - self._setup(prg) - - def kernel__setup(self, prg): - self._source = getattr(prg, "_source", None) - - self._generate_naive_call() - self._wg_info_cache = {} - return self - - def kernel_get_work_group_info(self, param, device): - try: - return self._wg_info_cache[param, device] - except KeyError: - pass - - result = kernel_old_get_work_group_info(self, param, device) - self._wg_info_cache[param, device] = result - return result - - # {{{ code generation for __call__, set_args - - def kernel__set_set_args_body(self, body, num_passed_args): - from pytools.py_codegen import ( - PythonFunctionGenerator, - PythonCodeGenerator, - Indentation) - - arg_names = ["arg%d" % i for i in range(num_passed_args)] - - # {{{ wrap in error handler - - err_gen = PythonCodeGenerator() - - def gen_error_handler(): - err_gen(""" - if current_arg is not None: - args = [{args}] - advice = "" - from pyopencl.array import Array - if isinstance(args[current_arg], Array): - advice = " (perhaps you meant to pass 'array.data' " \ - "instead of the array itself?)" - - raise _cl.LogicError( - "when processing argument #%d (1-based): %s%s" - % (current_arg+1, str(e), advice)) - else: - raise - """ - .format(args=", ".join(arg_names))) - err_gen("") - - err_gen("try:") - with Indentation(err_gen): - err_gen.extend(body) - err_gen("except TypeError as e:") - with Indentation(err_gen): - gen_error_handler() - err_gen("except _cl.LogicError as e:") - with Indentation(err_gen): - gen_error_handler() - - # }}} - - def add_preamble(gen): - gen.add_to_preamble( - "import numpy as np") - gen.add_to_preamble( - "import pyopencl.cffi_cl as _cl") - gen.add_to_preamble( - "from pyopencl.cffi_cl import _lib, " - "_ffi, _handle_error, _CLKernelArg") - gen.add_to_preamble("from pyopencl import status_code") - gen.add_to_preamble("from struct import pack") - gen.add_to_preamble("") - - # {{{ generate _enqueue - - gen = PythonFunctionGenerator("enqueue_knl_%s" % self.function_name, - ["self", "queue", "global_size", "local_size"] - + arg_names - + ["global_offset=None", "g_times_l=None", "wait_for=None"]) - - add_preamble(gen) - gen.extend(err_gen) - - gen(""" - return _cl.enqueue_nd_range_kernel(queue, self, global_size, local_size, - global_offset, wait_for, g_times_l=g_times_l) - """) - - self._enqueue = gen.get_function() - - # }}} - - # {{{ generate set_args - - gen = PythonFunctionGenerator("_set_args", ["self"] + arg_names) - - add_preamble(gen) - gen.extend(err_gen) - - self._set_args = gen.get_function() - - # }}} - - def kernel__generate_buffer_arg_setter(self, gen, arg_idx, buf_var): - from pytools.py_codegen import Indentation - - if _CPY2: - # https://github.com/numpy/numpy/issues/5381 - gen("if isinstance({buf_var}, np.generic):".format(buf_var=buf_var)) - with Indentation(gen): - gen("{buf_var} = np.getbuffer({buf_var})".format(buf_var=buf_var)) - - gen(""" - c_buf, sz, _ = _cl._c_buffer_from_obj({buf_var}) - status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, c_buf, sz) - if status != _ffi.NULL: - _handle_error(status) - """ - .format(arg_idx=arg_idx, buf_var=buf_var)) - - def kernel__generate_bytes_arg_setter(self, gen, arg_idx, buf_var): - gen(""" - status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, - {buf_var}, len({buf_var})) - if status != _ffi.NULL: - _handle_error(status) - """ - .format(arg_idx=arg_idx, buf_var=buf_var)) - - def kernel__generate_generic_arg_handler(self, gen, arg_idx, arg_var): - from pytools.py_codegen import Indentation - - gen(""" - if {arg_var} is None: - status = _lib.kernel__set_arg_null(self.ptr, {arg_idx}) - if status != _ffi.NULL: - _handle_error(status) - elif isinstance({arg_var}, _CLKernelArg): - self.set_arg({arg_idx}, {arg_var}) - """ - .format(arg_idx=arg_idx, arg_var=arg_var)) - - gen("else:") - with Indentation(gen): - self._generate_buffer_arg_setter(gen, arg_idx, arg_var) - - def kernel__generate_naive_call(self): - num_args = self.num_args - - from pytools.py_codegen import PythonCodeGenerator - gen = PythonCodeGenerator() - - if num_args == 0: - gen("pass") - - for i in range(num_args): - gen("# process argument {arg_idx}".format(arg_idx=i)) - gen("") - gen("current_arg = {arg_idx}".format(arg_idx=i)) - self._generate_generic_arg_handler(gen, i, "arg%d" % i) - gen("") - - self._set_set_args_body(gen, num_args) - - def kernel_set_scalar_arg_dtypes(self, scalar_arg_dtypes): - self._scalar_arg_dtypes = scalar_arg_dtypes - - # {{{ arg counting bug handling - - # For example: - # https://github.com/pocl/pocl/issues/197 - # (but Apple CPU has a similar bug) - - work_around_arg_count_bug = False - warn_about_arg_count_bug = False - - from pyopencl.characterize import has_struct_arg_count_bug - - count_bug_per_dev = [ - has_struct_arg_count_bug(dev) - for dev in self.context.devices] - - from pytools import single_valued - if any(count_bug_per_dev): - if all(count_bug_per_dev): - work_around_arg_count_bug = single_valued(count_bug_per_dev) - else: - warn_about_arg_count_bug = True - - fp_arg_count = 0 - - # }}} - - cl_arg_idx = 0 - - from pytools.py_codegen import PythonCodeGenerator - gen = PythonCodeGenerator() - - if not scalar_arg_dtypes: - gen("pass") - - for arg_idx, arg_dtype in enumerate(scalar_arg_dtypes): - gen("# process argument {arg_idx}".format(arg_idx=arg_idx)) - gen("") - gen("current_arg = {arg_idx}".format(arg_idx=arg_idx)) - arg_var = "arg%d" % arg_idx - - if arg_dtype is None: - self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var) - cl_arg_idx += 1 - gen("") - continue - - arg_dtype = np.dtype(arg_dtype) - - if arg_dtype.char == "V": - self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var) - cl_arg_idx += 1 - - elif arg_dtype.kind == "c": - if warn_about_arg_count_bug: - warn("{knl_name}: arguments include complex numbers, and " - "some (but not all) of the target devices mishandle " - "struct kernel arguments (hence the workaround is " - "disabled".format( - knl_name=self.function_name, stacklevel=2)) - - if arg_dtype == np.complex64: - arg_char = "f" - elif arg_dtype == np.complex128: - arg_char = "d" - else: - raise TypeError("unexpected complex type: %s" % arg_dtype) - - if (work_around_arg_count_bug == "pocl" - and arg_dtype == np.complex128 - and fp_arg_count + 2 <= 8): - gen( - "buf = pack('{arg_char}', {arg_var}.real)" - .format(arg_char=arg_char, arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - gen("current_arg = current_arg + 1000") - gen( - "buf = pack('{arg_char}', {arg_var}.imag)" - .format(arg_char=arg_char, arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - - elif (work_around_arg_count_bug == "apple" - and arg_dtype == np.complex128 - and fp_arg_count + 2 <= 8): - raise NotImplementedError("No work-around to " - "Apple's broken structs-as-kernel arg " - "handling has been found. " - "Cannot pass complex numbers to kernels.") - - else: - gen( - "buf = pack('{arg_char}{arg_char}', " - "{arg_var}.real, {arg_var}.imag)" - .format(arg_char=arg_char, arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - - fp_arg_count += 2 - - elif arg_dtype.char in "IL" and _CPY26: - # Prevent SystemError: ../Objects/longobject.c:336: bad - # argument to internal function - - gen( - "buf = pack('{arg_char}', long({arg_var}))" - .format(arg_char=arg_dtype.char, arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - - else: - if arg_dtype.kind == "f": - fp_arg_count += 1 - - arg_char = arg_dtype.char - arg_char = _type_char_map.get(arg_char, arg_char) - gen( - "buf = pack('{arg_char}', {arg_var})" - .format( - arg_char=arg_char, - arg_var=arg_var)) - self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") - cl_arg_idx += 1 - - gen("") - - if cl_arg_idx != self.num_args: - raise TypeError( - "length of argument list (%d) and " - "CL-generated number of arguments (%d) do not agree" - % (cl_arg_idx, self.num_args)) - - self._set_set_args_body(gen, len(scalar_arg_dtypes)) - - # }}} - - def kernel_set_args(self, *args, **kwargs): - # Need to dupicate the 'self' argument for dynamically generated method - return self._set_args(self, *args, **kwargs) - - def kernel_call(self, queue, global_size, local_size, *args, **kwargs): - # __call__ can't be overridden directly, so we need this - # trampoline hack. - return self._enqueue(self, queue, global_size, local_size, *args, **kwargs) - - def kernel_capture_call(self, filename, queue, global_size, local_size, - *args, **kwargs): - from pyopencl.capture_call import capture_kernel_call - capture_kernel_call(self, filename, queue, global_size, local_size, - *args, **kwargs) - - Kernel.__init__ = kernel_init - Kernel._setup = kernel__setup - Kernel.get_work_group_info = kernel_get_work_group_info - Kernel._set_set_args_body = kernel__set_set_args_body - Kernel._generate_buffer_arg_setter = kernel__generate_buffer_arg_setter - Kernel._generate_bytes_arg_setter = kernel__generate_bytes_arg_setter - Kernel._generate_generic_arg_handler = kernel__generate_generic_arg_handler - Kernel._generate_naive_call = kernel__generate_naive_call - Kernel.set_scalar_arg_dtypes = kernel_set_scalar_arg_dtypes - Kernel.set_args = kernel_set_args - Kernel.__call__ = kernel_call - Kernel.capture_call = kernel_capture_call - - # }}} - - # {{{ ImageFormat - - def image_format_repr(self): - return "ImageFormat(%s, %s)" % ( - channel_order.to_string(self.channel_order, - "<unknown channel order 0x%x>"), - channel_type.to_string(self.channel_data_type, - "<unknown channel data type 0x%x>")) - - def image_format_eq(self, other): - return (self.channel_order == other.channel_order - and self.channel_data_type == other.channel_data_type) - - def image_format_ne(self, other): - return not image_format_eq(self, other) - - def image_format_hash(self): - return hash((type(self), self.channel_order, self.channel_data_type)) - - ImageFormat.__repr__ = image_format_repr - ImageFormat.__eq__ = image_format_eq - ImageFormat.__ne__ = image_format_ne - ImageFormat.__hash__ = image_format_hash - - # }}} - - # {{{ Image - - image_old_init = Image.__init__ - - def image_init(self, context, flags, format, shape=None, pitches=None, - hostbuf=None, is_array=False, buffer=None): - - if shape is None and hostbuf is None: - raise Error("'shape' must be passed if 'hostbuf' is not given") - - if shape is None and hostbuf is not None: - shape = hostbuf.shape - - if hostbuf is not None and not \ - (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)): - from warnings import warn - warn("'hostbuf' was passed, but no memory flags to make use of it.") - - if hostbuf is None and pitches is not None: - raise Error("'pitches' may only be given if 'hostbuf' is given") - - if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2): - if buffer is not None and is_array: - raise ValueError( - "'buffer' and 'is_array' are mutually exclusive") - - if len(shape) == 3: - if buffer is not None: - raise TypeError( - "'buffer' argument is not supported for 3D arrays") - elif is_array: - image_type = mem_object_type.IMAGE2D_ARRAY - else: - image_type = mem_object_type.IMAGE3D - - elif len(shape) == 2: - if buffer is not None: - raise TypeError( - "'buffer' argument is not supported for 2D arrays") - elif is_array: - image_type = mem_object_type.IMAGE1D_ARRAY - else: - image_type = mem_object_type.IMAGE2D - - elif len(shape) == 1: - if buffer is not None: - image_type = mem_object_type.IMAGE1D_BUFFER - elif is_array: - raise TypeError("array of zero-dimensional images not supported") - else: - image_type = mem_object_type.IMAGE1D - - else: - raise ValueError("images cannot have more than three dimensions") - - desc = ImageDescriptor() - - desc.image_type = image_type - desc.shape = shape # also sets desc.array_size - - if pitches is None: - desc.pitches = (0, 0) - else: - desc.pitches = pitches - - desc.num_mip_levels = 0 # per CL 1.2 spec - desc.num_samples = 0 # per CL 1.2 spec - desc.buffer = buffer - - image_old_init(self, context, flags, format, desc, hostbuf) - else: - # legacy init for CL 1.1 and older - if is_array: - raise TypeError("'is_array=True' is not supported for CL < 1.2") - # if num_mip_levels is not None: - # raise TypeError( - # "'num_mip_levels' argument is not supported for CL < 1.2") - # if num_samples is not None: - # raise TypeError( - # "'num_samples' argument is not supported for CL < 1.2") - if buffer is not None: - raise TypeError("'buffer' argument is not supported for CL < 1.2") - - image_old_init(self, context, flags, format, shape, - pitches, hostbuf) - - class _ImageInfoGetter: - def __init__(self, event): - from warnings import warn - warn("Image.image.attr is deprecated. " - "Use Image.attr directly, instead.") - - self.event = event - - def __getattr__(self, name): - try: - inf_attr = getattr(_cl.image_info, name.upper()) - except AttributeError: - raise AttributeError("%s has no attribute '%s'" - % (type(self), name)) - else: - return self.event.get_image_info(inf_attr) - - def image_shape(self): - if self.type == mem_object_type.IMAGE2D: - return (self.width, self.height) - elif self.type == mem_object_type.IMAGE3D: - return (self.width, self.height, self.depth) - else: - raise LogicError("only images have shapes") - - Image.__init__ = image_init - Image.image = property(_ImageInfoGetter) - Image.shape = property(image_shape) - - # }}} - - # {{{ Error - - def error_str(self): - val = self.args[0] - try: - val.routine - except AttributeError: - return str(val) - else: - result = "" - if val.code() != status_code.SUCCESS: - result = status_code.to_string( - val.code(), "<unknown error %d>") - routine = val.routine() - if routine: - result = "%s failed: %s" % (routine, result) - what = val.what() - if what: - if result: - result += " - " - result += what - return result - - def error_code(self): - return self.args[0].code() - - def error_routine(self): - return self.args[0].routine() - - def error_what(self): - return self.args[0].what() - - Error.__str__ = error_str - Error.code = property(error_code) - Error.routine = property(error_routine) - Error.what = property(error_what) - - # }}} - - if _cl.have_gl(): - def gl_object_get_gl_object(self): - return self.get_gl_object_info()[1] - - GLBuffer.gl_object = property(gl_object_get_gl_object) - GLTexture.gl_object = property(gl_object_get_gl_object) - -_add_functionality() - - -# {{{ convenience +# {{{ create_some_context def create_some_context(interactive=None, answers=None, cache_dir=None): import os @@ -1437,6 +632,10 @@ def create_some_context(interactive=None, answers=None, cache_dir=None): _csc = create_some_context +# }}} + + +# {{{ enqueue_copy def _mark_copy_deprecated(func): def new_func(*args, **kwargs): @@ -1670,6 +869,7 @@ def enqueue_copy(queue, dest, src, **kwargs): # }}} + # {{{ image creation DTYPE_TO_CHANNEL_TYPE = { diff --git a/pyopencl/cache.py b/pyopencl/cache.py index 5ac23a49..bbe1314f 100644 --- a/pyopencl/cache.py +++ b/pyopencl/cache.py @@ -368,7 +368,7 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes, if log is not None and log.strip()) if message: - from pyopencl import compiler_output + from pyopencl.cffi_cl import compiler_output compiler_output( "Built kernel retrieved from cache. Original from-source " "build had warnings:\n"+message) diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py index 20e68401..94a3a1e2 100644 --- a/pyopencl/cffi_cl.py +++ b/pyopencl/cffi_cl.py @@ -27,12 +27,15 @@ THE SOFTWARE. """ import six -from six.moves import map, range, zip +from six.moves import map, range, zip, intern import warnings +from warnings import warn import numpy as np import sys +from pytools import memoize_method + from pyopencl._cffi import ffi as _ffi from .compyte.array import f_contiguous_strides, c_contiguous_strides @@ -86,6 +89,7 @@ _lib.set_py_funcs(_py_gc, _py_ref, _py_deref, _py_call) # are we running on pypy? _PYPY = '__pypy__' in sys.builtin_module_names _CPY2 = not _PYPY and sys.version_info < (3,) +_CPY26 = _CPY2 and sys.version_info < (2, 7) try: _unicode = eval('unicode') @@ -267,80 +271,92 @@ def get_cl_header_version(): _constants = {} -class _NoInit(object): +# {{{ constant classes + +class _ConstantsNamespace(object): def __init__(self): raise RuntimeError("This class cannot be instantiated.") + @classmethod + def to_string(cls, value, default_format=None): + for name in dir(cls): + if (not name.startswith("_") and getattr(cls, name) == value): + return name + + if default_format is None: + raise ValueError("a name for value %d was not found in %s" + % (value, cls.__name__)) + else: + return default_format % value -# {{{ constant classes # /!\ If you add anything here, add it to pyopencl/__init__.py as well. -class program_kind(_NoInit): # noqa +class program_kind(_ConstantsNamespace): # noqa pass -class status_code(_NoInit): # noqa +class status_code(_ConstantsNamespace): # noqa pass -class platform_info(_NoInit): # noqa +class platform_info(_ConstantsNamespace): # noqa pass -class device_type(_NoInit): # noqa +class device_type(_ConstantsNamespace): # noqa pass -class device_info(_NoInit): # noqa +class device_info(_ConstantsNamespace): # noqa pass -class device_fp_config(_NoInit): # noqa +class device_fp_config(_ConstantsNamespace): # noqa pass -class device_mem_cache_type(_NoInit): # noqa +class device_mem_cache_type(_ConstantsNamespace): # noqa pass -class device_local_mem_type(_NoInit): # noqa +class device_local_mem_type(_ConstantsNamespace): # noqa pass -class device_exec_capabilities(_NoInit): # noqa +class device_exec_capabilities(_ConstantsNamespace): # noqa pass -class device_svm_capabilities(_NoInit): # noqa +class device_svm_capabilities(_ConstantsNamespace): # noqa pass -class command_queue_properties(_NoInit): # noqa +class command_queue_properties(_ConstantsNamespace): # noqa pass -class context_info(_NoInit): # noqa +class context_info(_ConstantsNamespace): # noqa pass -class gl_context_info(_NoInit): # noqa +class gl_context_info(_ConstantsNamespace): # noqa pass -class context_properties(_NoInit): # noqa +class context_properties(_ConstantsNamespace): # noqa pass -class command_queue_info(_NoInit): # noqa +class command_queue_info(_ConstantsNamespace): # noqa pass -class queue_properties(_NoInit): # noqa +class queue_properties(_ConstantsNamespace): # noqa pass -class mem_flags(_NoInit): # noqa +class mem_flags(_ConstantsNamespace): # noqa @classmethod def _writable(cls, flags): return flags & (cls.READ_WRITE | cls.WRITE_ONLY) @@ -358,126 +374,125 @@ class mem_flags(_NoInit): # noqa return cls._writable(flags) and cls._hold_host(flags) -class svm_mem_flags(_NoInit): # noqa +class svm_mem_flags(_ConstantsNamespace): # noqa pass -class channel_order(_NoInit): # noqa +class channel_order(_ConstantsNamespace): # noqa pass -class channel_type(_NoInit): # noqa +class channel_type(_ConstantsNamespace): # noqa pass -class mem_object_type(_NoInit): # noqa +class mem_object_type(_ConstantsNamespace): # noqa pass -class mem_info(_NoInit): # noqa +class mem_info(_ConstantsNamespace): # noqa pass -class image_info(_NoInit): # noqa +class image_info(_ConstantsNamespace): # noqa pass -class addressing_mode(_NoInit): # noqa +class addressing_mode(_ConstantsNamespace): # noqa pass -class filter_mode(_NoInit): # noqa +class filter_mode(_ConstantsNamespace): # noqa pass -class sampler_info(_NoInit): # noqa +class sampler_info(_ConstantsNamespace): # noqa pass -class map_flags(_NoInit): # noqa +class map_flags(_ConstantsNamespace): # noqa pass -class program_info(_NoInit): # noqa +class program_info(_ConstantsNamespace): # noqa pass -class program_build_info(_NoInit): # noqa +class program_build_info(_ConstantsNamespace): # noqa pass -class program_binary_type(_NoInit): # noqa +class program_binary_type(_ConstantsNamespace): # noqa pass -class kernel_info(_NoInit): # noqa +class kernel_info(_ConstantsNamespace): # noqa pass -class kernel_arg_info(_NoInit): # noqa +class kernel_arg_info(_ConstantsNamespace): # noqa pass -class kernel_arg_address_qualifier(_NoInit): # noqa +class kernel_arg_address_qualifier(_ConstantsNamespace): # noqa pass -class kernel_arg_access_qualifier(_NoInit): # noqa +class kernel_arg_access_qualifier(_ConstantsNamespace): # noqa pass -class kernel_arg_type_qualifier(_NoInit): # noqa +class kernel_arg_type_qualifier(_ConstantsNamespace): # noqa pass -class kernel_work_group_info(_NoInit): # noqa +class kernel_work_group_info(_ConstantsNamespace): # noqa pass -class event_info(_NoInit): # noqa +class event_info(_ConstantsNamespace): # noqa pass -class command_type(_NoInit): # noqa +class command_type(_ConstantsNamespace): # noqa pass -class command_execution_status(_NoInit): # noqa +class command_execution_status(_ConstantsNamespace): # noqa pass -class profiling_info(_NoInit): # noqa +class profiling_info(_ConstantsNamespace): # noqa pass -class mem_migration_flags(_NoInit): # noqa +class mem_migration_flags(_ConstantsNamespace): # noqa pass -class mem_migration_flags_ext(_NoInit): # noqa +class mem_migration_flags_ext(_ConstantsNamespace): # noqa pass -class device_partition_property(_NoInit): # noqa +class device_partition_property(_ConstantsNamespace): # noqa pass -class device_affinity_domain(_NoInit): # noqa +class device_affinity_domain(_ConstantsNamespace): # noqa pass -class gl_object_type(_NoInit): # noqa +class gl_object_type(_ConstantsNamespace): # noqa pass -class gl_texture_info(_NoInit): # noqa +class gl_texture_info(_ConstantsNamespace): # noqa pass -class migrate_mem_object_flags_ext(_NoInit): # noqa +class migrate_mem_object_flags_ext(_ConstantsNamespace): # noqa pass - # }}} _locals = locals() @@ -524,6 +539,39 @@ class Error(Exception): else: super(Error, self).__init__(self._ErrorRecord(*a, **kw)) + def __str__(self): + val = self.args[0] + try: + val.routine + except AttributeError: + return str(val) + else: + result = "" + if val.code() != status_code.SUCCESS: + result = status_code.to_string( + val.code(), "<unknown error %d>") + routine = val.routine() + if routine: + result = "%s failed: %s" % (routine, result) + what = val.what() + if what: + if result: + result += " - " + result += what + return result + + @property + def code(self): + return self.args[0].code() + + @property + def routine(self): + return self.args[0].routine() + + @property + def what(self): + return self.args[0].what() + class MemoryError(Error): pass @@ -578,6 +626,20 @@ class Platform(_Common): return [Device._create(devices.ptr[0][i]) for i in range(devices.size[0])] + def __repr__(self): + return "<pyopencl.Platform '%s' at 0x%x>" % (self.name, self.int_ptr) + + def _get_cl_version(self): + import re + version_string = self.version + match = re.match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", version_string) + if match is None: + raise RuntimeError("platform %s returned non-conformant " + "platform version string '%s'" % + (self, version_string)) + + return int(match.group(1)), int(match.group(2)) + def unload_platform_compiler(plat): _handle_error(_lib.platform__unload_compiler(plat.ptr)) @@ -605,6 +667,14 @@ class Device(_Common): return [Device._create(devices.ptr[0][i]) for i in range(devices.size[0])] + def __repr__(self): + return "<pyopencl.Device '%s' on '%s' at 0x%x>" % ( + self.name.strip(), self.platform.name.strip(), self.int_ptr) + + @property + def persistent_unique_id(self): + return (self.vendor, self.vendor_id, self.name, self.version) + # }}} @@ -687,6 +757,14 @@ class Context(_Common): self.ptr = _ctx[0] self.cache_dir = cache_dir + def __repr__(self): + return "<pyopencl.Context at 0x%x on %s>" % (self.int_ptr, + ", ".join(repr(dev) for dev in self.devices)) + + @memoize_method + def _get_cl_version(self): + return self.devices[0].platform._get_cl_version() + # }}} @@ -713,6 +791,16 @@ class CommandQueue(_Common): def flush(self): _handle_error(_lib.command_queue__flush(self.ptr)) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.finish() + + def _get_cl_version(self): + return self.context._get_cl_version() + + # }}} @@ -988,6 +1076,21 @@ class Buffer(MemoryObject): # {{{ Program +class CompilerWarning(UserWarning): + pass + + +def compiler_output(text): + import os + from warnings import warn + if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")): + warn(text, CompilerWarning) + else: + warn("Non-empty compiler output encountered. Set the " + "environment variable PYOPENCL_COMPILER_OUTPUT=1 " + "to see more.", CompilerWarning) + + class _Program(_Common): _id = 'program' @@ -1088,20 +1191,403 @@ class _Program(_Common): ._setup(self) for i in range(knls.size[0])] + def _get_build_logs(self): + build_logs = [] + for dev in self.get_info(program_info.DEVICES): + try: + log = self.get_build_info(dev, program_build_info.LOG) + except: + log = "<error retrieving log>" + + build_logs.append((dev, log)) + + return build_logs + + def build(self, options_bytes, devices=None): + err = None + try: + self._build(options=options_bytes, devices=devices) + except Error as e: + what = e.what + "\n\n" + (75*"="+"\n").join( + "Build on %s:\n\n%s" % (dev, log) + for dev, log in self._get_build_logs()) + code = e.code + routine = e.routine + + err = RuntimeError( + Error._ErrorRecord( + what=lambda: what, + code=lambda: code, + routine=lambda: routine)) + + if err is not None: + # Python 3.2 outputs the whole list of currently active exceptions + # This serves to remove one (redundant) level from that nesting. + raise err + + message = (75*"="+"\n").join( + "Build on %s succeeded, but said:\n\n%s" % (dev, log) + for dev, log in self._get_build_logs() + if log is not None and log.strip()) + + if message: + if self.kind() == program_kind.SOURCE: + build_type = "From-source build" + elif self.kind() == program_kind.BINARY: + build_type = "From-binary build" + else: + build_type = "Build" + + compiler_output("%s succeeded, but resulted in non-empty logs:\n%s" + % (build_type, message)) + + return self + # }}} # {{{ Kernel +# {{{ arg packing helpers + +_size_t_char = ({ + 8: 'Q', + 4: 'L', + 2: 'H', + 1: 'B', +})[_ffi.sizeof('size_t')] +_type_char_map = { + 'n': _size_t_char.lower(), + 'N': _size_t_char +} +del _size_t_char + +# }}} + + class Kernel(_Common): _id = 'kernel' def __init__(self, program, name): + if not isinstance(program, _Program): + program = program._get_prg() + ptr_kernel = _ffi.new('clobj_t*') _handle_error(_lib.create_kernel(ptr_kernel, program.ptr, _to_cstring(name))) self.ptr = ptr_kernel[0] + self._setup(program) + + def _setup(self, prg): + self._source = getattr(prg, "_source", None) + + self._generate_naive_call() + self._wg_info_cache = {} + return self + + # {{{ code generation for __call__, set_args + + def _set_set_args_body(self, body, num_passed_args): + from pytools.py_codegen import ( + PythonFunctionGenerator, + PythonCodeGenerator, + Indentation) + + arg_names = ["arg%d" % i for i in range(num_passed_args)] + + # {{{ wrap in error handler + + err_gen = PythonCodeGenerator() + + def gen_error_handler(): + err_gen(""" + if current_arg is not None: + args = [{args}] + advice = "" + from pyopencl.array import Array + if isinstance(args[current_arg], Array): + advice = " (perhaps you meant to pass 'array.data' " \ + "instead of the array itself?)" + + raise _cl.LogicError( + "when processing argument #%d (1-based): %s%s" + % (current_arg+1, str(e), advice)) + else: + raise + """ + .format(args=", ".join(arg_names))) + err_gen("") + + err_gen("try:") + with Indentation(err_gen): + err_gen.extend(body) + err_gen("except TypeError as e:") + with Indentation(err_gen): + gen_error_handler() + err_gen("except _cl.LogicError as e:") + with Indentation(err_gen): + gen_error_handler() + + # }}} + + def add_preamble(gen): + gen.add_to_preamble( + "import numpy as np") + gen.add_to_preamble( + "import pyopencl.cffi_cl as _cl") + gen.add_to_preamble( + "from pyopencl.cffi_cl import _lib, " + "_ffi, _handle_error, _CLKernelArg") + gen.add_to_preamble("from pyopencl import status_code") + gen.add_to_preamble("from struct import pack") + gen.add_to_preamble("") + + # {{{ generate _enqueue + + gen = PythonFunctionGenerator("enqueue_knl_%s" % self.function_name, + ["self", "queue", "global_size", "local_size"] + + arg_names + + ["global_offset=None", "g_times_l=None", "wait_for=None"]) + + add_preamble(gen) + gen.extend(err_gen) + + gen(""" + return _cl.enqueue_nd_range_kernel(queue, self, global_size, local_size, + global_offset, wait_for, g_times_l=g_times_l) + """) + + self._enqueue = gen.get_function() + + # }}} + + # {{{ generate set_args + + gen = PythonFunctionGenerator("_set_args", ["self"] + arg_names) + + add_preamble(gen) + gen.extend(err_gen) + + self._set_args = gen.get_function() + + # }}} + + def _generate_buffer_arg_setter(self, gen, arg_idx, buf_var): + from pytools.py_codegen import Indentation + + if _CPY2: + # https://github.com/numpy/numpy/issues/5381 + gen("if isinstance({buf_var}, np.generic):".format(buf_var=buf_var)) + with Indentation(gen): + gen("{buf_var} = np.getbuffer({buf_var})".format(buf_var=buf_var)) + + gen(""" + c_buf, sz, _ = _cl._c_buffer_from_obj({buf_var}) + status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, c_buf, sz) + if status != _ffi.NULL: + _handle_error(status) + """ + .format(arg_idx=arg_idx, buf_var=buf_var)) + + def _generate_bytes_arg_setter(self, gen, arg_idx, buf_var): + gen(""" + status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, + {buf_var}, len({buf_var})) + if status != _ffi.NULL: + _handle_error(status) + """ + .format(arg_idx=arg_idx, buf_var=buf_var)) + + def _generate_generic_arg_handler(self, gen, arg_idx, arg_var): + from pytools.py_codegen import Indentation + + gen(""" + if {arg_var} is None: + status = _lib.kernel__set_arg_null(self.ptr, {arg_idx}) + if status != _ffi.NULL: + _handle_error(status) + elif isinstance({arg_var}, _CLKernelArg): + self.set_arg({arg_idx}, {arg_var}) + """ + .format(arg_idx=arg_idx, arg_var=arg_var)) + + gen("else:") + with Indentation(gen): + self._generate_buffer_arg_setter(gen, arg_idx, arg_var) + + def _generate_naive_call(self): + num_args = self.num_args + + from pytools.py_codegen import PythonCodeGenerator + gen = PythonCodeGenerator() + + if num_args == 0: + gen("pass") + + for i in range(num_args): + gen("# process argument {arg_idx}".format(arg_idx=i)) + gen("") + gen("current_arg = {arg_idx}".format(arg_idx=i)) + self._generate_generic_arg_handler(gen, i, "arg%d" % i) + gen("") + + self._set_set_args_body(gen, num_args) + + def set_scalar_arg_dtypes(self, scalar_arg_dtypes): + self._scalar_arg_dtypes = scalar_arg_dtypes + + # {{{ arg counting bug handling + + # For example: + # https://github.com/pocl/pocl/issues/197 + # (but Apple CPU has a similar bug) + + work_around_arg_count_bug = False + warn_about_arg_count_bug = False + + from pyopencl.characterize import has_struct_arg_count_bug + + count_bug_per_dev = [ + has_struct_arg_count_bug(dev) + for dev in self.context.devices] + + from pytools import single_valued + if any(count_bug_per_dev): + if all(count_bug_per_dev): + work_around_arg_count_bug = single_valued(count_bug_per_dev) + else: + warn_about_arg_count_bug = True + + fp_arg_count = 0 + + # }}} + + cl_arg_idx = 0 + + from pytools.py_codegen import PythonCodeGenerator + gen = PythonCodeGenerator() + + if not scalar_arg_dtypes: + gen("pass") + + for arg_idx, arg_dtype in enumerate(scalar_arg_dtypes): + gen("# process argument {arg_idx}".format(arg_idx=arg_idx)) + gen("") + gen("current_arg = {arg_idx}".format(arg_idx=arg_idx)) + arg_var = "arg%d" % arg_idx + + if arg_dtype is None: + self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var) + cl_arg_idx += 1 + gen("") + continue + + arg_dtype = np.dtype(arg_dtype) + + if arg_dtype.char == "V": + self._generate_generic_arg_handler(gen, cl_arg_idx, arg_var) + cl_arg_idx += 1 + + elif arg_dtype.kind == "c": + if warn_about_arg_count_bug: + warn("{knl_name}: arguments include complex numbers, and " + "some (but not all) of the target devices mishandle " + "struct kernel arguments (hence the workaround is " + "disabled".format( + knl_name=self.function_name, stacklevel=2)) + + if arg_dtype == np.complex64: + arg_char = "f" + elif arg_dtype == np.complex128: + arg_char = "d" + else: + raise TypeError("unexpected complex type: %s" % arg_dtype) + + if (work_around_arg_count_bug == "pocl" + and arg_dtype == np.complex128 + and fp_arg_count + 2 <= 8): + gen( + "buf = pack('{arg_char}', {arg_var}.real)" + .format(arg_char=arg_char, arg_var=arg_var)) + self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + gen("current_arg = current_arg + 1000") + gen( + "buf = pack('{arg_char}', {arg_var}.imag)" + .format(arg_char=arg_char, arg_var=arg_var)) + self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + + elif (work_around_arg_count_bug == "apple" + and arg_dtype == np.complex128 + and fp_arg_count + 2 <= 8): + raise NotImplementedError("No work-around to " + "Apple's broken structs-as-kernel arg " + "handling has been found. " + "Cannot pass complex numbers to kernels.") + + else: + gen( + "buf = pack('{arg_char}{arg_char}', " + "{arg_var}.real, {arg_var}.imag)" + .format(arg_char=arg_char, arg_var=arg_var)) + self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + + fp_arg_count += 2 + + elif arg_dtype.char in "IL" and _CPY26: + # Prevent SystemError: ../Objects/longobject.c:336: bad + # argument to internal function + + gen( + "buf = pack('{arg_char}', long({arg_var}))" + .format(arg_char=arg_dtype.char, arg_var=arg_var)) + self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + + else: + if arg_dtype.kind == "f": + fp_arg_count += 1 + + arg_char = arg_dtype.char + arg_char = _type_char_map.get(arg_char, arg_char) + gen( + "buf = pack('{arg_char}', {arg_var})" + .format( + arg_char=arg_char, + arg_var=arg_var)) + self._generate_bytes_arg_setter(gen, cl_arg_idx, "buf") + cl_arg_idx += 1 + + gen("") + + if cl_arg_idx != self.num_args: + raise TypeError( + "length of argument list (%d) and " + "CL-generated number of arguments (%d) do not agree" + % (cl_arg_idx, self.num_args)) + + self._set_set_args_body(gen, len(scalar_arg_dtypes)) + + # }}} + + def set_args(self, *args, **kwargs): + # Need to duplicate the 'self' argument for dynamically generated method + return self._set_args(self, *args, **kwargs) + + def __call__(self, queue, global_size, local_size, *args, **kwargs): + # __call__ can't be overridden directly, so we need this + # trampoline hack. + return self._enqueue(self, queue, global_size, local_size, *args, **kwargs) + + def capture_call(self, filename, queue, global_size, local_size, + *args, **kwargs): + from pyopencl.capture_call import capture_kernel_call + capture_kernel_call(self, filename, queue, global_size, local_size, + *args, **kwargs) + def set_arg(self, arg_index, arg): # If you change this, also change the kernel call generation logic. if arg is None: @@ -1126,10 +1612,18 @@ class Kernel(_Common): c_buf, size)) def get_work_group_info(self, param, device): + try: + return self._wg_info_cache[param, device] + except KeyError: + pass + info = _ffi.new('generic_info*') _handle_error(_lib.kernel__get_work_group_info( self.ptr, param, device.ptr, info)) - return _generic_info_to_python(info) + result = _generic_info_to_python(info) + + self._wg_info_cache[param, device] = result + return result def get_arg_info(self, idx, param): info = _ffi.new('generic_info*') @@ -1162,6 +1656,24 @@ class Event(_Common): _ffi.new_handle(_func))) +class ProfilingInfoGetter: + def __init__(self, event): + self.event = event + + def __getattr__(self, name): + info_cls = profiling_info + + try: + inf_attr = getattr(info_cls, name.upper()) + except AttributeError: + raise AttributeError("%s has no attribute '%s'" + % (type(self), name)) + else: + return self.event.get_profiling_info(inf_attr) + +Event.profile = property(ProfilingInfoGetter) + + def wait_for_events(wait_for): _handle_error(_lib.wait_for_events(*_clobj_list(wait_for))) @@ -1788,6 +2300,23 @@ class ImageFormat(object): def itemsize(self): return self.channel_count * self.dtype_size + def __repr__(self): + return "ImageFormat(%s, %s)" % ( + channel_order.to_string(self.channel_order, + "<unknown channel order 0x%x>"), + channel_type.to_string(self.channel_data_type, + "<unknown channel data type 0x%x>")) + + def __eq__(self, other): + return (self.channel_order == other.channel_order + and self.channel_data_type == other.channel_data_type) + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash((type(self), self.channel_order, self.channel_data_type)) + def get_supported_image_formats(context, flags, image_type): info = _ffi.new('generic_info*') @@ -1887,7 +2416,7 @@ _fill_dtype_dict = { class Image(MemoryObject): _id = 'image' - def __init__(self, *args): + def __init_dispatch(self, *args): if len(args) == 5: # >= 1.2 self.__init_1_2(*args) @@ -1960,11 +2489,122 @@ class Image(MemoryObject): raise LogicError("invalid dimension", status_code.INVALID_VALUE, "Image") + def __init__(self, context, flags, format, shape=None, pitches=None, + hostbuf=None, is_array=False, buffer=None): + + if shape is None and hostbuf is None: + raise Error("'shape' must be passed if 'hostbuf' is not given") + + if shape is None and hostbuf is not None: + shape = hostbuf.shape + + if hostbuf is not None and not \ + (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)): + from warnings import warn + warn("'hostbuf' was passed, but no memory flags to make use of it.") + + if hostbuf is None and pitches is not None: + raise Error("'pitches' may only be given if 'hostbuf' is given") + + if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2): + if buffer is not None and is_array: + raise ValueError( + "'buffer' and 'is_array' are mutually exclusive") + + if len(shape) == 3: + if buffer is not None: + raise TypeError( + "'buffer' argument is not supported for 3D arrays") + elif is_array: + image_type = mem_object_type.IMAGE2D_ARRAY + else: + image_type = mem_object_type.IMAGE3D + + elif len(shape) == 2: + if buffer is not None: + raise TypeError( + "'buffer' argument is not supported for 2D arrays") + elif is_array: + image_type = mem_object_type.IMAGE1D_ARRAY + else: + image_type = mem_object_type.IMAGE2D + + elif len(shape) == 1: + if buffer is not None: + image_type = mem_object_type.IMAGE1D_BUFFER + elif is_array: + raise TypeError("array of zero-dimensional images not supported") + else: + image_type = mem_object_type.IMAGE1D + + else: + raise ValueError("images cannot have more than three dimensions") + + desc = ImageDescriptor() + + desc.image_type = image_type + desc.shape = shape # also sets desc.array_size + + if pitches is None: + desc.pitches = (0, 0) + else: + desc.pitches = pitches + + desc.num_mip_levels = 0 # per CL 1.2 spec + desc.num_samples = 0 # per CL 1.2 spec + desc.buffer = buffer + + self.__init_dispatch(context, flags, format, desc, hostbuf) + else: + # legacy init for CL 1.1 and older + if is_array: + raise TypeError("'is_array=True' is not supported for CL < 1.2") + # if num_mip_levels is not None: + # raise TypeError( + # "'num_mip_levels' argument is not supported for CL < 1.2") + # if num_samples is not None: + # raise TypeError( + # "'num_samples' argument is not supported for CL < 1.2") + if buffer is not None: + raise TypeError("'buffer' argument is not supported for CL < 1.2") + + self.__init_dispatch(context, flags, format, shape, + pitches, hostbuf) + def get_image_info(self, param): info = _ffi.new('generic_info*') _handle_error(_lib.image__get_image_info(self.ptr, param, info)) return _generic_info_to_python(info) + @property + def shape(self): + if self.type == mem_object_type.IMAGE2D: + return (self.width, self.height) + elif self.type == mem_object_type.IMAGE3D: + return (self.width, self.height, self.depth) + else: + raise LogicError("only images have shapes") + + +class _ImageInfoGetter: + def __init__(self, event): + from warnings import warn + warn("Image.image.attr is deprecated. " + "Use Image.attr directly, instead.") + + self.event = event + + def __getattr__(self, name): + try: + inf_attr = getattr(image_info, name.upper()) + except AttributeError: + raise AttributeError("%s has no attribute '%s'" + % (type(self), name)) + else: + return self.event.get_image_info(inf_attr) + +Image.info = property(_ImageInfoGetter) + # }}} @@ -2044,4 +2684,66 @@ class DeviceTopologyAmd(object): # }}} + +# {{{ get_info monkeypatchery + +def add_get_info_attrs(cls, info_method, info_class, cacheable_attrs=None): + if cacheable_attrs is None: + cacheable_attrs = [] + + def make_getinfo(info_method, info_name, info_attr): + def result(self): + return info_method(self, info_attr) + + return property(result) + + def make_cacheable_getinfo(info_method, info_name, cache_attr, info_attr): + def result(self): + try: + return getattr(self, cache_attr) + except AttributeError: + pass + + result = info_method(self, info_attr) + setattr(self, cache_attr, result) + return result + + return property(result) + + for info_name, info_value in six.iteritems(info_class.__dict__): + if info_name == "to_string" or info_name.startswith("_"): + continue + + info_lower = info_name.lower() + info_constant = getattr(info_class, info_name) + if info_name in cacheable_attrs: + cache_attr = intern("_info_cache_"+info_lower) + setattr(cls, info_lower, make_cacheable_getinfo( + info_method, info_lower, cache_attr, info_constant)) + else: + setattr(cls, info_lower, make_getinfo( + info_method, info_name, info_constant)) + +add_get_info_attrs(Platform, Platform.get_info, platform_info), +add_get_info_attrs(Device, Device.get_info, device_info, + ["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"]) +add_get_info_attrs(Context, Context.get_info, context_info) +add_get_info_attrs(CommandQueue, CommandQueue.get_info, command_queue_info, + ["CONTEXT", "DEVICE"]) +add_get_info_attrs(Event, Event.get_info, event_info) +add_get_info_attrs(MemoryObjectHolder, MemoryObjectHolder.get_info, mem_info) +add_get_info_attrs(Image, Image.get_image_info, image_info) +add_get_info_attrs(Kernel, Kernel.get_info, kernel_info) +add_get_info_attrs(Sampler, Sampler.get_info, sampler_info) + +# }}} + + +if have_gl(): + def gl_object_get_gl_object(self): + return self.get_gl_object_info()[1] + + GLBuffer.gl_object = property(gl_object_get_gl_object) + GLTexture.gl_object = property(gl_object_get_gl_object) + # vim: foldmethod=marker -- GitLab