From b25cb26ea2cf4cd1c476bb60c475bd68c7cfd13f Mon Sep 17 00:00:00 2001 From: Marko Bencun <mbencun@gmail.com> Date: Sat, 7 Sep 2013 20:50:41 +0200 Subject: [PATCH] fixed building/deploying of cffi extension --- pyopencl/__init__.py | 1159 +----------------------------------------- pyopencl/_cffi.py | 15 +- pyopencl/_init.py | 1156 +++++++++++++++++++++++++++++++++++++++++ pyopencl/cffi_cl.py | 55 +- setup.py | 13 +- 5 files changed, 1205 insertions(+), 1193 deletions(-) create mode 100644 pyopencl/_init.py diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 704dd7b4..161bf038 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -1,1156 +1,3 @@ -# -*- coding: utf-8 -*- - -__copyright__ = "Copyright (C) 2009 Andreas Kloeckner" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa -try: - import pyopencl.cffi_cl as _cl - #import pyopencl._cl as _cl -except ImportError: - import os - from os.path import dirname, join, realpath - if realpath(join(os.getcwd(), "pyopencl")) == realpath(dirname(__file__)): - from warnings import warn - warn("It looks like you are importing PyOpenCL from " - "its source directory. This likely won't work.") - raise - -# _ccl = _cl -# import cffi_cl -# _cl = cffi_cl -import np -#from pyopencl._cl import * # noqa -from pyopencl.cffi_cl import * -import inspect as _inspect - -CONSTANT_CLASSES = [ - getattr(_cl, name) for name in dir(_cl) - if _inspect.isclass(getattr(_cl, name)) - and name[0].islower()] - -class CompilerWarning(UserWarning): - pass - - -def compiler_output(text): - import os - from warnings import warn - if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")): - warn(text, CompilerWarning) - else: - warn("Non-empty compiler output encountered. Set the " - "environment variable PYOPENCL_COMPILER_OUTPUT=1 " - "to see more.", CompilerWarning) - - -# {{{ Program (including caching support) - -class Program(object): - def __init__(self, arg1, arg2=None, arg3=None): - if arg2 is None: - # 1-argument form: program - self._prg = arg1 - - elif arg3 is None: - # 2-argument form: context, source - context, source = arg1, arg2 - - import sys - if isinstance(source, unicode) and sys.version_info < (3,): - from warnings import warn - warn("Received OpenCL source code in Unicode, " - "should be ASCII string. Attempting conversion.", - stacklevel=2) - source = str(source) - - self._context = context - self._source = source - self._prg = None - - else: - # 3-argument form: context, devices, binaries - self._prg = _cl._Program(arg1, arg2, arg3) - - def _get_prg(self): - if self._prg is not None: - return self._prg - else: - # "no program" can only happen in from-source case. - from warnings import warn - warn("Pre-build attribute access defeats compiler caching.", - stacklevel=3) - - self._prg = _cl._Program(self._context, self._source) - del self._context - return self._prg - - def get_info(self, arg): - return self._get_prg().get_info(arg) - - def get_build_info(self, *args, **kwargs): - return self._get_prg().get_build_info(*args, **kwargs) - - def all_kernels(self): - return self._get_prg().all_kernels() - - def int_ptr(self): - return self._get_prg().int_ptr - int_ptr = property(int_ptr, doc=_cl._Program.int_ptr.__doc__) - - def from_int_ptr(int_ptr_value): - return Program(_cl._Program.from_int_ptr(int_ptr_value)) - from_int_ptr.__doc__ = _cl._Program.from_int_ptr.__doc__ - from_int_ptr = staticmethod(from_int_ptr) - - def __getattr__(self, attr): - try: - knl = Kernel(self, attr) - # Nvidia does not raise errors even for invalid names, - # but this will give an error if the kernel is invalid. - knl.num_args - knl._source = getattr(self, "_source", None) - return knl - except LogicError: - raise AttributeError("'%s' was not found as a program " - "info attribute or as a kernel name" % attr) - - # {{{ build - - def build(self, options=[], devices=None, cache_dir=None): - if isinstance(options, str): - options = [options] - - options = options + ["-I", _find_pyopencl_include_path()] - - import os - forced_options = os.environ.get("PYOPENCL_BUILD_OPTIONS") - if forced_options: - options = options + forced_options.split() - - if os.environ.get("PYOPENCL_NO_CACHE") and self._prg is None: - self._prg = _cl._Program(self._context, self._source) - if self._prg is not None: - # uncached - self._build_and_catch_errors( - lambda: self._prg.build(" ".join(options), devices), - options=options) - - else: - # cached - from pyopencl.cache import create_built_program_from_source_cached - self._prg = self._build_and_catch_errors( - lambda: create_built_program_from_source_cached( - self._context, self._source, options, devices, - cache_dir=cache_dir), - options=options, source=self._source) - - del self._context - - return self - - def _build_and_catch_errors(self, build_func, options, source=None): - try: - return build_func() - except _cl.RuntimeError, e: - what = e.what - if options: - what = what + "\n(options: %s)" % " ".join(options) - - if source is not None: - from tempfile import NamedTemporaryFile - srcfile = NamedTemporaryFile(mode="wt", delete=False, suffix=".cl") - try: - srcfile.write(source) - finally: - srcfile.close() - - what = what + "\n(source saved as %s)" % srcfile.name - - code = e.code - routine = e.routine - - err = _cl.RuntimeError(routine, code, what) - - # Python 3.2 outputs the whole list of currently active exceptions - # This serves to remove one (redundant) level from that nesting. - raise err - - # }}} - - def compile(self, options=[], devices=None, headers=[]): - options = " ".join(options) - return self._prg().compile(options, devices, headers) - - def __eq__(self, other): - return self._get_prg() == other._get_prg() - - def __ne__(self, other): - return self._get_prg() == other._get_prg() - - def __hash__(self): - return hash(self._get_prg()) - - -def create_program_with_built_in_kernels(context, devices, kernel_names): - if not isinstance(kernel_names, str): - kernel_names = ":".join(kernel_names) - - return Program(_Program.create_with_built_in_kernels( - context, devices, kernel_names)) - - -def link_program(context, programs, options=[], devices=None): - options = " ".join(options) - return Program(_Program.link(context, programs, options, devices)) - -# }}} - -def _add_functionality(): - cls_to_info_cls = { - Platform: - (Platform.get_info, platform_info), - Device: - (Device.get_info, device_info), - Context: - (Context.get_info, context_info), - CommandQueue: - (CommandQueue.get_info, command_queue_info), - Event: - (Event.get_info, event_info), - MemoryObjectHolder: - (MemoryObjectHolder.get_info, mem_info), - # Image: - # (Image.get_image_info, image_info), - Program: - (Program.get_info, program_info), - Kernel: - (Kernel.get_info, kernel_info), - # Sampler: - # (Sampler.get_info, sampler_info), - } - - def to_string(cls, value, default_format=None): - for name in dir(cls): - if (not name.startswith("_") and getattr(cls, name) == value): - return name - - if default_format is None: - raise ValueError("a name for value %d was not found in %s" - % (value, cls.__name__)) - else: - return default_format % value - - for cls in CONSTANT_CLASSES: - cls.to_string = classmethod(to_string) - - # {{{ get_info attributes ------------------------------------------------- - - def make_getinfo(info_method, info_attr): - def result(self): - return info_method(self, info_attr) - - return property(result) - - for cls, (info_method, info_class) in cls_to_info_cls.iteritems(): - for info_name, info_value in info_class.__dict__.iteritems(): - if info_name == "to_string" or info_name.startswith("_"): - continue - setattr(cls, info_name.lower(), make_getinfo( - info_method, getattr(info_class, info_name))) - # }}} - - # {{{ Platform - - def platform_repr(self): - return "<pyopencl.Platform '%s' at 0x%x>" % (self.name, self.int_ptr) - - Platform.__repr__ = platform_repr - - # }}} - - # {{{ Device - - def device_repr(self): - return "<pyopencl.Device '%s' on '%s' at 0x%x>" % ( - self.name.strip(), self.platform.name.strip(), self.int_ptr) - - Device.__repr__ = device_repr - - # }}} - - # {{{ Context - - def context_repr(self): - return "<pyopencl.Context at TODO on %s>" % (", ".join(repr(dev) for dev in self.devices)) - # return "<pyopencl.Context at 0x%x on %s>" % (self.obj_ptr, - # ", ".join(repr(dev) for dev in self.devices)) - - def context_get_cl_version(self): - import re - platform = self.devices[0].platform - plat_version_string = platform.version - match = re.match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", - plat_version_string) - if match is None: - raise RuntimeError("platform %s returned non-conformant " - "platform version string '%s'" % (platform, plat_version_string)) - - return int(match.group(1)), int(match.group(2)) - - Context.__repr__ = context_repr - from pytools import memoize_method - Context._get_cl_version = memoize_method(context_get_cl_version) - - # }}} - - # {{{ CommandQueue - - def command_queue_enter(self): - return self - - def command_queue_exit(self, exc_type, exc_val, exc_tb): - self.finish() - - def command_queue_get_cl_version(self): - return self.context._get_cl_version() - - CommandQueue.__enter__ = command_queue_enter - CommandQueue.__exit__ = command_queue_exit - CommandQueue._get_cl_version = memoize_method(command_queue_get_cl_version) - - # }}} - - # {{{ _Program (the internal, non-caching version) - - def program_get_build_logs(self): - build_logs = [] - for dev in self.get_info(_cl.program_info.DEVICES): - try: - log = self.get_build_info(dev, program_build_info.LOG) - except: - log = "<error retrieving log>" - - build_logs.append((dev, log)) - - return build_logs - - def program_build(self, options=[], devices=None): - if isinstance(options, list): - options = " ".join(options) - - err = None - try: - self._build(options=options, devices=devices) - except Exception, e: - what = e.what + "\n\n" + (75*"="+"\n").join( - "Build on %s:\n\n%s" % (dev, log) - for dev, log in self._get_build_logs()) - code = e.code - routine = e.routine - - err = _cl.RuntimeError(routine, code, what) - - if err is not None: - # Python 3.2 outputs the whole list of currently active exceptions - # This serves to remove one (redundant) level from that nesting. - raise err - - message = (75*"="+"\n").join( - "Build on %s succeeded, but said:\n\n%s" % (dev, log) - for dev, log in self._get_build_logs() - if log is not None and log.strip()) - - if message: - if self.kind() == program_kind.SOURCE: - build_type = "From-source build" - elif self.kind() == program_kind.BINARY: - build_type = "From-binary build" - else: - build_type = "Build" - - compiler_output("%s succeeded, but resulted in non-empty logs:\n%s" - % (build_type, message)) - - return self - - _cl._Program._get_build_logs = program_get_build_logs - _cl._Program.build = program_build - - # }}} - - # {{{ Event - class ProfilingInfoGetter: - def __init__(self, event): - self.event = event - - def __getattr__(self, name): - info_cls = _cl.profiling_info - - try: - inf_attr = getattr(info_cls, name.upper()) - except AttributeError: - raise AttributeError("%s has no attribute '%s'" - % (type(self), name)) - else: - return self.event.get_profiling_info(inf_attr) - - _cl.Event.profile = property(ProfilingInfoGetter) - - # }}} - - # {{{ Kernel - - kernel_old_init = Kernel.__init__ - - def kernel_init(self, prg, name): - if not isinstance(prg, _cl._Program): - prg = prg._get_prg() - - kernel_old_init(self, prg, name) - self._source = getattr(prg, "_source", None) - - def kernel_call(self, queue, global_size, local_size, *args, **kwargs): - global_offset = kwargs.pop("global_offset", None) - g_times_l = kwargs.pop("g_times_l", False) - wait_for = kwargs.pop("wait_for", None) - - if kwargs: - raise TypeError( - "Kernel.__call__ recived unexpected keyword arguments: %s" - % ", ".join(kwargs.keys())) - - self.set_args(*args) - - return enqueue_nd_range_kernel(queue, self, global_size, local_size, - global_offset, wait_for, g_times_l=g_times_l) - - def kernel_set_scalar_arg_dtypes(self, arg_dtypes): - assert len(arg_dtypes) == self.num_args, ( - "length of argument type array (%d) and " - "CL-generated number of arguments (%d) do not agree" - % (len(arg_dtypes), self.num_args)) - - arg_type_chars = [] - - for arg_dtype in arg_dtypes: - if arg_dtype is None: - arg_type_chars.append(None) - else: - arg_type_chars.append(np.dtype(arg_dtype).char) - - self._arg_type_chars = arg_type_chars - - def kernel_set_args(self, *args): - assert len(args) == self.num_args, ( - "length of argument list (%d) and " - "CL-generated number of arguments (%d) do not agree" - % (len(args), self.num_args)) - - i = None - try: - try: - arg_type_chars = self.__dict__["_arg_type_chars"] - except KeyError: - for i, arg in enumerate(args): - self.set_arg(i, arg) - else: - from pyopencl._pvt_struct import pack - - for i, (arg, arg_type_char) in enumerate( - zip(args, arg_type_chars)): - if arg_type_char and arg_type_char != "V": - self.set_arg(i, pack(arg_type_char, arg)) - else: - self.set_arg(i, arg) - except LogicError, e: - if i is not None: - advice = "" - from pyopencl.array import Array - if isinstance(args[i], Array): - advice = " (perhaps you meant to pass 'array.data' " \ - "instead of the array itself?)" - - raise LogicError( - "when processing argument #%d (1-based): %s%s" - % (i+1, str(e), advice)) - else: - raise - - def kernel_capture_call(self, filename, queue, global_size, local_size, - *args, **kwargs): - from pyopencl.capture_call import capture_kernel_call - capture_kernel_call(self, filename, queue, global_size, local_size, - *args, **kwargs) - - Kernel.__init__ = kernel_init - Kernel.__call__ = kernel_call - Kernel.set_scalar_arg_dtypes = kernel_set_scalar_arg_dtypes - Kernel.set_args = kernel_set_args - Kernel.capture_call = kernel_capture_call - - # }}} - - # # {{{ ImageFormat - - # def image_format_repr(self): - # return "ImageFormat(%s, %s)" % ( - # channel_order.to_string(self.channel_order, - # "<unknown channel order 0x%x>"), - # channel_type.to_string(self.channel_data_type, - # "<unknown channel data type 0x%x>")) - - # def image_format_eq(self, other): - # return (self.channel_order == other.channel_order - # and self.channel_data_type == other.channel_data_type) - - # def image_format_ne(self, other): - # return not image_format_eq(self, other) - - # def image_format_hash(self): - # return hash((type(self), self.channel_order, self.channel_data_type)) - - # ImageFormat.__repr__ = image_format_repr - # ImageFormat.__eq__ = image_format_eq - # ImageFormat.__ne__ = image_format_ne - # ImageFormat.__hash__ = image_format_hash - - # # }}} - - # # {{{ Image - - # image_old_init = Image.__init__ - - # def image_init(self, context, flags, format, shape=None, pitches=None, - # hostbuf=None, is_array=False, buffer=None): - - # if shape is None and hostbuf is None: - # raise Error("'shape' must be passed if 'hostbuf' is not given") - - # if shape is None and hostbuf is not None: - # shape = hostbuf.shape - - # if hostbuf is not None and not \ - # (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)): - # from warnings import warn - # warn("'hostbuf' was passed, but no memory flags to make use of it.") - - # if hostbuf is None and pitches is not None: - # raise Error("'pitches' may only be given if 'hostbuf' is given") - - # if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2): - # if buffer is not None and is_array: - # raise ValueError( - # "'buffer' and 'is_array' are mutually exclusive") - - # if len(shape) == 3: - # if buffer is not None: - # raise TypeError( - # "'buffer' argument is not supported for 3D arrays") - # elif is_array: - # image_type = mem_object_type.IMAGE2D_ARRAY - # else: - # image_type = mem_object_type.IMAGE3D - - # elif len(shape) == 2: - # if buffer is not None: - # raise TypeError( - # "'buffer' argument is not supported for 2D arrays") - # elif is_array: - # image_type = mem_object_type.IMAGE1D_ARRAY - # else: - # image_type = mem_object_type.IMAGE2D - - # elif len(shape) == 1: - # if buffer is not None: - # image_type = mem_object_type.IMAGE1D_BUFFER - # elif is_array: - # raise TypeError("array of zero-dimensional images not supported") - # else: - # image_type = mem_object_type.IMAGE1D - - # else: - # raise ValueError("images cannot have more than three dimensions") - - # desc = ImageDescriptor() - - # desc.image_type = image_type - # desc.shape = shape # also sets desc.array_size - - # if pitches is None: - # desc.pitches = (0, 0) - # else: - # desc.pitches = pitches - - # desc.num_mip_levels = 0 # per CL 1.2 spec - # desc.num_samples = 0 # per CL 1.2 spec - # desc.buffer = buffer - - # image_old_init(self, context, flags, format, desc, hostbuf) - # else: - # # legacy init for CL 1.1 and older - # if is_array: - # raise TypeError("'is_array=True' is not supported for CL < 1.2") - # #if num_mip_levels is not None: - # #raise TypeError( - # # "'num_mip_levels' argument is not supported for CL < 1.2") - # #if num_samples is not None: - # #raise TypeError( - # # "'num_samples' argument is not supported for CL < 1.2") - # if buffer is not None: - # raise TypeError("'buffer' argument is not supported for CL < 1.2") - - # image_old_init(self, context, flags, format, shape, - # pitches, hostbuf) - - # class _ImageInfoGetter: - # def __init__(self, event): - # from warnings import warn - # warn("Image.image.attr is deprecated. " - # "Use Image.attr directly, instead.") - - # self.event = event - - # def __getattr__(self, name): - # try: - # inf_attr = getattr(_cl.image_info, name.upper()) - # except AttributeError: - # raise AttributeError("%s has no attribute '%s'" - # % (type(self), name)) - # else: - # return self.event.get_image_info(inf_attr) - - # def image_shape(self): - # if self.type == mem_object_type.IMAGE2D: - # return (self.width, self.height) - # elif self.type == mem_object_type.IMAGE3D: - # return (self.width, self.height, self.depth) - # else: - # raise LogicError("only images have shapes") - - # Image.__init__ = image_init - # Image.image = property(_ImageInfoGetter) - # Image.shape = property(image_shape) - - # # }}} - - # # {{{ Error - - # def error_str(self): - # val = self.args[0] - # try: - # val.routine - # except AttributeError: - # return str(val) - # else: - # result = "%s failed: %s" % (val.routine(), - # status_code.to_string(val.code(), "<unknown error %d>") - # .lower().replace("_", " ")) - # if val.what(): - # result += " - " + val.what() - # return result - - # def error_code(self): - # return self.args[0].code() - - # def error_routine(self): - # return self.args[0].routine() - - # def error_what(self): - # return self.args[0].what() - - # Error.__str__ = error_str - # Error.code = property(error_code) - # Error.routine = property(error_routine) - # Error.what = property(error_what) - - # # }}} - - # if _cl.have_gl(): - # def gl_object_get_gl_object(self): - # return self.get_gl_object_info()[1] - - # GLBuffer.gl_object = property(gl_object_get_gl_object) - # GLTexture.gl_object = property(gl_object_get_gl_object) - -_add_functionality() - - -# {{{ find pyopencl shipped source code - -def _find_pyopencl_include_path(): - from pkg_resources import Requirement, resource_filename - return resource_filename(Requirement.parse("pyopencl"), "pyopencl/cl") - -# }}} - - -# {{{ convenience - -def create_some_context(interactive=True, answers=None): - import os - if answers is None and "PYOPENCL_CTX" in os.environ: - ctx_spec = os.environ["PYOPENCL_CTX"] - answers = ctx_spec.split(":") - - if answers is not None: - pre_provided_answers = answers - answers = answers[:] - else: - pre_provided_answers = None - - user_inputs = [] - - try: - import sys - if not sys.stdin.isatty(): - interactive = False - except: - interactive = False - - def cc_print(s): - if interactive: - print s - - def get_input(prompt): - if answers: - return str(answers.pop(0)) - elif not interactive: - return '' - else: - user_input = raw_input(prompt) - user_inputs.append(user_input) - return user_input - - # {{{ pick a platform - - platforms = get_platforms() - - if not platforms: - raise Error("no platforms found") - elif len(platforms) == 1: - platform, = platforms - else: - if not answers: - cc_print("Choose platform:") - for i, pf in enumerate(platforms): - cc_print("[%d] %s" % (i, pf)) - - answer = get_input("Choice [0]:") - if not answer: - platform = platforms[0] - else: - platform = None - try: - int_choice = int(answer) - except ValueError: - pass - else: - if 0 <= int_choice < len(platforms): - platform = platforms[int_choice] - - if platform is None: - answer = answer.lower() - for i, pf in enumerate(platforms): - if answer in pf.name.lower(): - platform = pf - if platform is None: - raise RuntimeError("input did not match any platform") - - # }}} - - # {{{ pick a device - - devices = platform.get_devices() - - def parse_device(choice): - try: - int_choice = int(choice) - except ValueError: - pass - else: - if 0 <= int_choice < len(devices): - return devices[int_choice] - - choice = choice.lower() - for i, dev in enumerate(devices): - if choice in dev.name.lower(): - return dev - raise RuntimeError("input did not match any device") - - if not devices: - raise Error("no devices found") - elif len(devices) == 1: - pass - else: - if not answers: - cc_print("Choose device(s):") - for i, dev in enumerate(devices): - cc_print("[%d] %s" % (i, dev)) - - answer = get_input("Choice, comma-separated [0]:") - if not answer: - devices = [devices[0]] - else: - devices = [parse_device(i) for i in answer.split(",")] - - # }}} - - if user_inputs: - if pre_provided_answers is not None: - user_inputs = pre_provided_answers + user_inputs - cc_print("Set the environment variable PYOPENCL_CTX='%s' to " - "avoid being asked again." % ":".join(user_inputs)) - - if answers: - raise RuntimeError("not all provided choices were used by " - "create_some_context. (left over: '%s')" % ":".join(answers)) - - return Context(devices) - -_csc = create_some_context - - -def _mark_copy_deprecated(func): - def new_func(*args, **kwargs): - from warnings import warn - warn("'%s' has been deprecated in version 2011.1. Please use " - "enqueue_copy() instead." % func.__name__[1:], DeprecationWarning, - stacklevel=2) - return func(*args, **kwargs) - - try: - from functools import update_wrapper - except ImportError: - pass - else: - try: - update_wrapper(new_func, func) - except AttributeError: - pass - - return new_func - - -# enqueue_read_image = _mark_copy_deprecated(_cl._enqueue_read_image) -# enqueue_write_image = _mark_copy_deprecated(_cl._enqueue_write_image) -# enqueue_copy_image = _mark_copy_deprecated(_cl._enqueue_copy_image) -# enqueue_copy_image_to_buffer = _mark_copy_deprecated( -# _cl._enqueue_copy_image_to_buffer) -# enqueue_copy_buffer_to_image = _mark_copy_deprecated( -# _cl._enqueue_copy_buffer_to_image) -enqueue_read_buffer = _mark_copy_deprecated(_cl._enqueue_read_buffer) -enqueue_write_buffer = _mark_copy_deprecated(_cl._enqueue_write_buffer) -enqueue_copy_buffer = _mark_copy_deprecated(_cl._enqueue_copy_buffer) - - -# if _cl.get_cl_header_version() >= (1, 1): -# enqueue_read_buffer_rect = _mark_copy_deprecated(_cl._enqueue_read_buffer_rect) -# enqueue_write_buffer_rect = _mark_copy_deprecated(_cl._enqueue_write_buffer_rect) -# enqueue_copy_buffer_rect = _mark_copy_deprecated(_cl._enqueue_copy_buffer_rect) - - -def enqueue_copy(queue, dest, src, **kwargs): - """Copy from :class:`Image`, :class:`Buffer` or the host to - :class:`Image`, :class:`Buffer` or the host. (Note: host-to-host - copies are unsupported.) - - The following keyword arguments are available: - - :arg wait_for: (optional, default empty) - :arg is_blocking: Wait for completion. Defaults to *True*. - (Available on any copy involving host memory) - - :return: A :class:`NannyEvent` if the transfer involved a - host-side buffer, otherwise an :class:`Event`. - - .. ------------------------------------------------------------------------ - .. rubric :: Transfer :class:`Buffer` ↔ host - .. ------------------------------------------------------------------------ - - :arg device_offset: offset in bytes (optional) - - .. note:: - - The size of the transfer is controlled by the size of the - of the host-side buffer. If the host-side buffer - is a :class:`numpy.ndarray`, you can control the transfer size by - transfering into a smaller 'view' of the target array, like this:: - - cl.enqueue_copy(queue, large_dest_numpy_array[:15], src_buffer) - - .. ------------------------------------------------------------------------ - .. rubric :: Transfer :class:`Buffer` ↔ :class:`Buffer` - .. ------------------------------------------------------------------------ - - :arg byte_count: (optional) If not specified, defaults to the - size of the source in versions 2012.x and earlier, - and to the minimum of the size of the source and target - from 2013.1 on. - :arg src_offset: (optional) - :arg dest_offset: (optional) - - .. ------------------------------------------------------------------------ - .. rubric :: Rectangular :class:`Buffer` ↔ host transfers (CL 1.1 and newer) - .. ------------------------------------------------------------------------ - - :arg buffer_origin: :class:`tuple` of :class:`int` of length - three or shorter. (mandatory) - :arg host_origin: :class:`tuple` of :class:`int` of length - three or shorter. (mandatory) - :arg region: :class:`tuple` of :class:`int` of length - three or shorter. (mandatory) - :arg buffer_pitches: :class:`tuple` of :class:`int` of length - two or shorter. (optional, "tightly-packed" if unspecified) - :arg host_pitches: :class:`tuple` of :class:`int` of length - two or shorter. (optional, "tightly-packed" if unspecified) - - .. ------------------------------------------------------------------------ - .. rubric :: Transfer :class:`Image` ↔ host - .. ------------------------------------------------------------------------ - - :arg origin: :class:`tuple` of :class:`int` of length - three or shorter. (mandatory) - :arg region: :class:`tuple` of :class:`int` of length - three or shorter. (mandatory) - :arg pitches: :class:`tuple` of :class:`int` of length - two or shorter. (optional) - - .. ------------------------------------------------------------------------ - .. rubric :: Transfer :class:`Buffer` ↔ :class:`Image` - .. ------------------------------------------------------------------------ - - :arg offset: offset in buffer (mandatory) - :arg origin: :class:`tuple` of :class:`int` of length - three or shorter. (mandatory) - :arg region: :class:`tuple` of :class:`int` of length - three or shorter. (mandatory) - - .. ------------------------------------------------------------------------ - .. rubric :: Transfer :class:`Image` ↔ :class:`Image` - .. ------------------------------------------------------------------------ - - :arg src_origin: :class:`tuple` of :class:`int` of length - three or shorter. (mandatory) - :arg dest_origin: :class:`tuple` of :class:`int` of length - three or shorter. (mandatory) - :arg region: :class:`tuple` of :class:`int` of length - three or shorter. (mandatory) - - |std-enqueue-blurb| - - .. versionadded:: 2011.1 - """ - - if isinstance(dest, MemoryObjectHolder): - if dest.type == mem_object_type.BUFFER: - if isinstance(src, MemoryObjectHolder): - if src.type == mem_object_type.BUFFER: - if "src_origin" in kwargs: - return _cl._enqueue_copy_buffer_rect( - queue, src, dest, **kwargs) - else: - kwargs["dst_offset"] = kwargs.pop("dest_offset", 0) - return _cl._enqueue_copy_buffer(queue, src, dest, **kwargs) - elif src.type in [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]: - return _cl._enqueue_copy_image_to_buffer( - queue, src, dest, **kwargs) - else: - raise ValueError("invalid src mem object type") - else: - # assume from-host - if "buffer_origin" in kwargs: - return _cl._enqueue_write_buffer_rect(queue, dest, src, **kwargs) - else: - return _cl._enqueue_write_buffer(queue, dest, src, **kwargs) - - elif dest.type in [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]: - if isinstance(src, MemoryObjectHolder): - if src.type == mem_object_type.BUFFER: - return _cl._enqueue_copy_buffer_to_image( - queue, src, dest, **kwargs) - elif src.type in [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]: - return _cl._enqueue_copy_image(queue, src, dest, **kwargs) - else: - raise ValueError("invalid src mem object type") - else: - # assume from-host - origin = kwargs.pop("origin") - region = kwargs.pop("region") - - pitches = kwargs.pop("pitches", (0, 0)) - if len(pitches) == 1: - kwargs["row_pitch"], = pitches - else: - kwargs["row_pitch"], kwargs["slice_pitch"] = pitches - - return _cl._enqueue_write_image( - queue, dest, origin, region, src, **kwargs) - else: - raise ValueError("invalid dest mem object type") - - else: - # assume to-host - - if isinstance(src, MemoryObjectHolder): - if src.type == mem_object_type.BUFFER: - if "buffer_origin" in kwargs: - return _cl._enqueue_read_buffer_rect(queue, src, dest, **kwargs) - else: - return _cl._enqueue_read_buffer(queue, src, dest, **kwargs) - elif src.type in [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]: - origin = kwargs.pop("origin") - region = kwargs.pop("region") - - pitches = kwargs.pop("pitches", (0, 0)) - if len(pitches) == 1: - kwargs["row_pitch"], = pitches - else: - kwargs["row_pitch"], kwargs["slice_pitch"] = pitches - - return _cl._enqueue_read_image( - queue, src, origin, region, dest, **kwargs) - else: - raise ValueError("invalid src mem object type") - else: - # assume from-host - raise TypeError("enqueue_copy cannot perform host-to-host transfers") - -# }}} - -# {{{ image creation - -DTYPE_TO_CHANNEL_TYPE = { - np.dtype(np.float32): channel_type.FLOAT, - np.dtype(np.int16): channel_type.SIGNED_INT16, - np.dtype(np.int32): channel_type.SIGNED_INT32, - np.dtype(np.int8): channel_type.SIGNED_INT8, - np.dtype(np.uint16): channel_type.UNSIGNED_INT16, - np.dtype(np.uint32): channel_type.UNSIGNED_INT32, - np.dtype(np.uint8): channel_type.UNSIGNED_INT8, - } -try: - np.float16 -except: - pass -else: - DTYPE_TO_CHANNEL_TYPE[np.dtype(np.float16)] = channel_type.HALF_FLOAT, - -DTYPE_TO_CHANNEL_TYPE_NORM = { - np.dtype(np.int16): channel_type.SNORM_INT16, - np.dtype(np.int8): channel_type.SNORM_INT8, - np.dtype(np.uint16): channel_type.UNORM_INT16, - np.dtype(np.uint8): channel_type.UNORM_INT8, - } - - -def image_from_array(ctx, ary, num_channels=None, mode="r", norm_int=False): - if not ary.flags.c_contiguous: - raise ValueError("array must be C-contiguous") - - dtype = ary.dtype - if num_channels is None: - - from pyopencl.array import vec - try: - dtype, num_channels = vec.type_to_scalar_and_count[dtype] - except KeyError: - # It must be a scalar type then. - num_channels = 1 - - shape = ary.shape - strides = ary.strides - - elif num_channels == 1: - shape = ary.shape - strides = ary.strides - else: - if ary.shape[-1] != num_channels: - raise RuntimeError("last dimension must be equal to number of channels") - - shape = ary.shape[:-1] - strides = ary.strides[:-1] - - if mode == "r": - mode_flags = mem_flags.READ_ONLY - elif mode == "w": - mode_flags = mem_flags.WRITE_ONLY - else: - raise ValueError("invalid value '%s' for 'mode'" % mode) - - img_format = { - 1: channel_order.R, - 2: channel_order.RG, - 3: channel_order.RGB, - 4: channel_order.RGBA, - }[num_channels] - - assert ary.strides[-1] == ary.dtype.itemsize - - if norm_int: - channel_type = DTYPE_TO_CHANNEL_TYPE_NORM[dtype] - else: - channel_type = DTYPE_TO_CHANNEL_TYPE[dtype] - - return Image(ctx, mode_flags | mem_flags.COPY_HOST_PTR, - ImageFormat(img_format, channel_type), - shape=shape[::-1], pitches=strides[::-1][1:], - hostbuf=ary) - -# }}} - - -# {{{ enqueue_* compatibility shims - -def enqueue_marker(queue, wait_for=None): - if queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2): - return _cl._enqueue_marker_with_wait_list(queue, wait_for) - else: - if wait_for: - _cl._enqueue_wait_for_events(queue, wait_for) - return _cl._enqueue_marker(queue) - - -def enqueue_barrier(queue, wait_for=None): - if queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2): - return _cl._enqueue_barrier_with_wait_list(queue, wait_for) - else: - _cl._enqueue_barrier(queue) - if wait_for: - _cl._enqueue_wait_for_events(queue, wait_for) - return _cl._enqueue_marker(queue) - - -def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None): - if not (queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2)): - from warnings import warn - warn("The context for this queue does not declare OpenCL 1.2 support, so " - "the next thing you might see is a crash") - return _cl.enqueue_fill_buffer(queue, mem, pattern, offset, - size, wait_for=None) - - - -# }}} - - -# vim: foldmethod=marker +import os +if 'PYOPENCL_SETUP' not in os.environ: + from _init import * diff --git a/pyopencl/_cffi.py b/pyopencl/_cffi.py index 6a0159f1..8fc1aa29 100644 --- a/pyopencl/_cffi.py +++ b/pyopencl/_cffi.py @@ -83,21 +83,24 @@ with open(os.path.join(current_directory, 'wrap_cl_core.h')) as _f: _ffi.cdef('%s\n%s' % (_cl_header, _wrap_cl_header)) def _get_verifier(**kwargs): + # called by setup.py at build-time, with the relevant sources/include dirs/defines. # called by pyopencl at runtime with no kwargs, as we do not want to build at runtime, # but only get the cached version. - - from cffi.verifier import Verifier - return Verifier( - _ffi, + + _ffi.verify( """ #include <wrap_cl.h> """, - modulename='wrapcl', + # needs to be the same as ext_package in setup.py + ext_package='pyopencl', + modulename='cffi_wrapcl', **kwargs) + return _ffi.verifier + def _get_lib(): - # should + # is expected to return the library from cache return _ffi, _get_verifier().load_library() diff --git a/pyopencl/_init.py b/pyopencl/_init.py new file mode 100644 index 00000000..704dd7b4 --- /dev/null +++ b/pyopencl/_init.py @@ -0,0 +1,1156 @@ +# -*- coding: utf-8 -*- + +__copyright__ = "Copyright (C) 2009 Andreas Kloeckner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa +try: + import pyopencl.cffi_cl as _cl + #import pyopencl._cl as _cl +except ImportError: + import os + from os.path import dirname, join, realpath + if realpath(join(os.getcwd(), "pyopencl")) == realpath(dirname(__file__)): + from warnings import warn + warn("It looks like you are importing PyOpenCL from " + "its source directory. This likely won't work.") + raise + +# _ccl = _cl +# import cffi_cl +# _cl = cffi_cl +import np +#from pyopencl._cl import * # noqa +from pyopencl.cffi_cl import * +import inspect as _inspect + +CONSTANT_CLASSES = [ + getattr(_cl, name) for name in dir(_cl) + if _inspect.isclass(getattr(_cl, name)) + and name[0].islower()] + +class CompilerWarning(UserWarning): + pass + + +def compiler_output(text): + import os + from warnings import warn + if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")): + warn(text, CompilerWarning) + else: + warn("Non-empty compiler output encountered. Set the " + "environment variable PYOPENCL_COMPILER_OUTPUT=1 " + "to see more.", CompilerWarning) + + +# {{{ Program (including caching support) + +class Program(object): + def __init__(self, arg1, arg2=None, arg3=None): + if arg2 is None: + # 1-argument form: program + self._prg = arg1 + + elif arg3 is None: + # 2-argument form: context, source + context, source = arg1, arg2 + + import sys + if isinstance(source, unicode) and sys.version_info < (3,): + from warnings import warn + warn("Received OpenCL source code in Unicode, " + "should be ASCII string. Attempting conversion.", + stacklevel=2) + source = str(source) + + self._context = context + self._source = source + self._prg = None + + else: + # 3-argument form: context, devices, binaries + self._prg = _cl._Program(arg1, arg2, arg3) + + def _get_prg(self): + if self._prg is not None: + return self._prg + else: + # "no program" can only happen in from-source case. + from warnings import warn + warn("Pre-build attribute access defeats compiler caching.", + stacklevel=3) + + self._prg = _cl._Program(self._context, self._source) + del self._context + return self._prg + + def get_info(self, arg): + return self._get_prg().get_info(arg) + + def get_build_info(self, *args, **kwargs): + return self._get_prg().get_build_info(*args, **kwargs) + + def all_kernels(self): + return self._get_prg().all_kernels() + + def int_ptr(self): + return self._get_prg().int_ptr + int_ptr = property(int_ptr, doc=_cl._Program.int_ptr.__doc__) + + def from_int_ptr(int_ptr_value): + return Program(_cl._Program.from_int_ptr(int_ptr_value)) + from_int_ptr.__doc__ = _cl._Program.from_int_ptr.__doc__ + from_int_ptr = staticmethod(from_int_ptr) + + def __getattr__(self, attr): + try: + knl = Kernel(self, attr) + # Nvidia does not raise errors even for invalid names, + # but this will give an error if the kernel is invalid. + knl.num_args + knl._source = getattr(self, "_source", None) + return knl + except LogicError: + raise AttributeError("'%s' was not found as a program " + "info attribute or as a kernel name" % attr) + + # {{{ build + + def build(self, options=[], devices=None, cache_dir=None): + if isinstance(options, str): + options = [options] + + options = options + ["-I", _find_pyopencl_include_path()] + + import os + forced_options = os.environ.get("PYOPENCL_BUILD_OPTIONS") + if forced_options: + options = options + forced_options.split() + + if os.environ.get("PYOPENCL_NO_CACHE") and self._prg is None: + self._prg = _cl._Program(self._context, self._source) + if self._prg is not None: + # uncached + self._build_and_catch_errors( + lambda: self._prg.build(" ".join(options), devices), + options=options) + + else: + # cached + from pyopencl.cache import create_built_program_from_source_cached + self._prg = self._build_and_catch_errors( + lambda: create_built_program_from_source_cached( + self._context, self._source, options, devices, + cache_dir=cache_dir), + options=options, source=self._source) + + del self._context + + return self + + def _build_and_catch_errors(self, build_func, options, source=None): + try: + return build_func() + except _cl.RuntimeError, e: + what = e.what + if options: + what = what + "\n(options: %s)" % " ".join(options) + + if source is not None: + from tempfile import NamedTemporaryFile + srcfile = NamedTemporaryFile(mode="wt", delete=False, suffix=".cl") + try: + srcfile.write(source) + finally: + srcfile.close() + + what = what + "\n(source saved as %s)" % srcfile.name + + code = e.code + routine = e.routine + + err = _cl.RuntimeError(routine, code, what) + + # Python 3.2 outputs the whole list of currently active exceptions + # This serves to remove one (redundant) level from that nesting. + raise err + + # }}} + + def compile(self, options=[], devices=None, headers=[]): + options = " ".join(options) + return self._prg().compile(options, devices, headers) + + def __eq__(self, other): + return self._get_prg() == other._get_prg() + + def __ne__(self, other): + return self._get_prg() == other._get_prg() + + def __hash__(self): + return hash(self._get_prg()) + + +def create_program_with_built_in_kernels(context, devices, kernel_names): + if not isinstance(kernel_names, str): + kernel_names = ":".join(kernel_names) + + return Program(_Program.create_with_built_in_kernels( + context, devices, kernel_names)) + + +def link_program(context, programs, options=[], devices=None): + options = " ".join(options) + return Program(_Program.link(context, programs, options, devices)) + +# }}} + +def _add_functionality(): + cls_to_info_cls = { + Platform: + (Platform.get_info, platform_info), + Device: + (Device.get_info, device_info), + Context: + (Context.get_info, context_info), + CommandQueue: + (CommandQueue.get_info, command_queue_info), + Event: + (Event.get_info, event_info), + MemoryObjectHolder: + (MemoryObjectHolder.get_info, mem_info), + # Image: + # (Image.get_image_info, image_info), + Program: + (Program.get_info, program_info), + Kernel: + (Kernel.get_info, kernel_info), + # Sampler: + # (Sampler.get_info, sampler_info), + } + + def to_string(cls, value, default_format=None): + for name in dir(cls): + if (not name.startswith("_") and getattr(cls, name) == value): + return name + + if default_format is None: + raise ValueError("a name for value %d was not found in %s" + % (value, cls.__name__)) + else: + return default_format % value + + for cls in CONSTANT_CLASSES: + cls.to_string = classmethod(to_string) + + # {{{ get_info attributes ------------------------------------------------- + + def make_getinfo(info_method, info_attr): + def result(self): + return info_method(self, info_attr) + + return property(result) + + for cls, (info_method, info_class) in cls_to_info_cls.iteritems(): + for info_name, info_value in info_class.__dict__.iteritems(): + if info_name == "to_string" or info_name.startswith("_"): + continue + setattr(cls, info_name.lower(), make_getinfo( + info_method, getattr(info_class, info_name))) + # }}} + + # {{{ Platform + + def platform_repr(self): + return "<pyopencl.Platform '%s' at 0x%x>" % (self.name, self.int_ptr) + + Platform.__repr__ = platform_repr + + # }}} + + # {{{ Device + + def device_repr(self): + return "<pyopencl.Device '%s' on '%s' at 0x%x>" % ( + self.name.strip(), self.platform.name.strip(), self.int_ptr) + + Device.__repr__ = device_repr + + # }}} + + # {{{ Context + + def context_repr(self): + return "<pyopencl.Context at TODO on %s>" % (", ".join(repr(dev) for dev in self.devices)) + # return "<pyopencl.Context at 0x%x on %s>" % (self.obj_ptr, + # ", ".join(repr(dev) for dev in self.devices)) + + def context_get_cl_version(self): + import re + platform = self.devices[0].platform + plat_version_string = platform.version + match = re.match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", + plat_version_string) + if match is None: + raise RuntimeError("platform %s returned non-conformant " + "platform version string '%s'" % (platform, plat_version_string)) + + return int(match.group(1)), int(match.group(2)) + + Context.__repr__ = context_repr + from pytools import memoize_method + Context._get_cl_version = memoize_method(context_get_cl_version) + + # }}} + + # {{{ CommandQueue + + def command_queue_enter(self): + return self + + def command_queue_exit(self, exc_type, exc_val, exc_tb): + self.finish() + + def command_queue_get_cl_version(self): + return self.context._get_cl_version() + + CommandQueue.__enter__ = command_queue_enter + CommandQueue.__exit__ = command_queue_exit + CommandQueue._get_cl_version = memoize_method(command_queue_get_cl_version) + + # }}} + + # {{{ _Program (the internal, non-caching version) + + def program_get_build_logs(self): + build_logs = [] + for dev in self.get_info(_cl.program_info.DEVICES): + try: + log = self.get_build_info(dev, program_build_info.LOG) + except: + log = "<error retrieving log>" + + build_logs.append((dev, log)) + + return build_logs + + def program_build(self, options=[], devices=None): + if isinstance(options, list): + options = " ".join(options) + + err = None + try: + self._build(options=options, devices=devices) + except Exception, e: + what = e.what + "\n\n" + (75*"="+"\n").join( + "Build on %s:\n\n%s" % (dev, log) + for dev, log in self._get_build_logs()) + code = e.code + routine = e.routine + + err = _cl.RuntimeError(routine, code, what) + + if err is not None: + # Python 3.2 outputs the whole list of currently active exceptions + # This serves to remove one (redundant) level from that nesting. + raise err + + message = (75*"="+"\n").join( + "Build on %s succeeded, but said:\n\n%s" % (dev, log) + for dev, log in self._get_build_logs() + if log is not None and log.strip()) + + if message: + if self.kind() == program_kind.SOURCE: + build_type = "From-source build" + elif self.kind() == program_kind.BINARY: + build_type = "From-binary build" + else: + build_type = "Build" + + compiler_output("%s succeeded, but resulted in non-empty logs:\n%s" + % (build_type, message)) + + return self + + _cl._Program._get_build_logs = program_get_build_logs + _cl._Program.build = program_build + + # }}} + + # {{{ Event + class ProfilingInfoGetter: + def __init__(self, event): + self.event = event + + def __getattr__(self, name): + info_cls = _cl.profiling_info + + try: + inf_attr = getattr(info_cls, name.upper()) + except AttributeError: + raise AttributeError("%s has no attribute '%s'" + % (type(self), name)) + else: + return self.event.get_profiling_info(inf_attr) + + _cl.Event.profile = property(ProfilingInfoGetter) + + # }}} + + # {{{ Kernel + + kernel_old_init = Kernel.__init__ + + def kernel_init(self, prg, name): + if not isinstance(prg, _cl._Program): + prg = prg._get_prg() + + kernel_old_init(self, prg, name) + self._source = getattr(prg, "_source", None) + + def kernel_call(self, queue, global_size, local_size, *args, **kwargs): + global_offset = kwargs.pop("global_offset", None) + g_times_l = kwargs.pop("g_times_l", False) + wait_for = kwargs.pop("wait_for", None) + + if kwargs: + raise TypeError( + "Kernel.__call__ recived unexpected keyword arguments: %s" + % ", ".join(kwargs.keys())) + + self.set_args(*args) + + return enqueue_nd_range_kernel(queue, self, global_size, local_size, + global_offset, wait_for, g_times_l=g_times_l) + + def kernel_set_scalar_arg_dtypes(self, arg_dtypes): + assert len(arg_dtypes) == self.num_args, ( + "length of argument type array (%d) and " + "CL-generated number of arguments (%d) do not agree" + % (len(arg_dtypes), self.num_args)) + + arg_type_chars = [] + + for arg_dtype in arg_dtypes: + if arg_dtype is None: + arg_type_chars.append(None) + else: + arg_type_chars.append(np.dtype(arg_dtype).char) + + self._arg_type_chars = arg_type_chars + + def kernel_set_args(self, *args): + assert len(args) == self.num_args, ( + "length of argument list (%d) and " + "CL-generated number of arguments (%d) do not agree" + % (len(args), self.num_args)) + + i = None + try: + try: + arg_type_chars = self.__dict__["_arg_type_chars"] + except KeyError: + for i, arg in enumerate(args): + self.set_arg(i, arg) + else: + from pyopencl._pvt_struct import pack + + for i, (arg, arg_type_char) in enumerate( + zip(args, arg_type_chars)): + if arg_type_char and arg_type_char != "V": + self.set_arg(i, pack(arg_type_char, arg)) + else: + self.set_arg(i, arg) + except LogicError, e: + if i is not None: + advice = "" + from pyopencl.array import Array + if isinstance(args[i], Array): + advice = " (perhaps you meant to pass 'array.data' " \ + "instead of the array itself?)" + + raise LogicError( + "when processing argument #%d (1-based): %s%s" + % (i+1, str(e), advice)) + else: + raise + + def kernel_capture_call(self, filename, queue, global_size, local_size, + *args, **kwargs): + from pyopencl.capture_call import capture_kernel_call + capture_kernel_call(self, filename, queue, global_size, local_size, + *args, **kwargs) + + Kernel.__init__ = kernel_init + Kernel.__call__ = kernel_call + Kernel.set_scalar_arg_dtypes = kernel_set_scalar_arg_dtypes + Kernel.set_args = kernel_set_args + Kernel.capture_call = kernel_capture_call + + # }}} + + # # {{{ ImageFormat + + # def image_format_repr(self): + # return "ImageFormat(%s, %s)" % ( + # channel_order.to_string(self.channel_order, + # "<unknown channel order 0x%x>"), + # channel_type.to_string(self.channel_data_type, + # "<unknown channel data type 0x%x>")) + + # def image_format_eq(self, other): + # return (self.channel_order == other.channel_order + # and self.channel_data_type == other.channel_data_type) + + # def image_format_ne(self, other): + # return not image_format_eq(self, other) + + # def image_format_hash(self): + # return hash((type(self), self.channel_order, self.channel_data_type)) + + # ImageFormat.__repr__ = image_format_repr + # ImageFormat.__eq__ = image_format_eq + # ImageFormat.__ne__ = image_format_ne + # ImageFormat.__hash__ = image_format_hash + + # # }}} + + # # {{{ Image + + # image_old_init = Image.__init__ + + # def image_init(self, context, flags, format, shape=None, pitches=None, + # hostbuf=None, is_array=False, buffer=None): + + # if shape is None and hostbuf is None: + # raise Error("'shape' must be passed if 'hostbuf' is not given") + + # if shape is None and hostbuf is not None: + # shape = hostbuf.shape + + # if hostbuf is not None and not \ + # (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)): + # from warnings import warn + # warn("'hostbuf' was passed, but no memory flags to make use of it.") + + # if hostbuf is None and pitches is not None: + # raise Error("'pitches' may only be given if 'hostbuf' is given") + + # if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2): + # if buffer is not None and is_array: + # raise ValueError( + # "'buffer' and 'is_array' are mutually exclusive") + + # if len(shape) == 3: + # if buffer is not None: + # raise TypeError( + # "'buffer' argument is not supported for 3D arrays") + # elif is_array: + # image_type = mem_object_type.IMAGE2D_ARRAY + # else: + # image_type = mem_object_type.IMAGE3D + + # elif len(shape) == 2: + # if buffer is not None: + # raise TypeError( + # "'buffer' argument is not supported for 2D arrays") + # elif is_array: + # image_type = mem_object_type.IMAGE1D_ARRAY + # else: + # image_type = mem_object_type.IMAGE2D + + # elif len(shape) == 1: + # if buffer is not None: + # image_type = mem_object_type.IMAGE1D_BUFFER + # elif is_array: + # raise TypeError("array of zero-dimensional images not supported") + # else: + # image_type = mem_object_type.IMAGE1D + + # else: + # raise ValueError("images cannot have more than three dimensions") + + # desc = ImageDescriptor() + + # desc.image_type = image_type + # desc.shape = shape # also sets desc.array_size + + # if pitches is None: + # desc.pitches = (0, 0) + # else: + # desc.pitches = pitches + + # desc.num_mip_levels = 0 # per CL 1.2 spec + # desc.num_samples = 0 # per CL 1.2 spec + # desc.buffer = buffer + + # image_old_init(self, context, flags, format, desc, hostbuf) + # else: + # # legacy init for CL 1.1 and older + # if is_array: + # raise TypeError("'is_array=True' is not supported for CL < 1.2") + # #if num_mip_levels is not None: + # #raise TypeError( + # # "'num_mip_levels' argument is not supported for CL < 1.2") + # #if num_samples is not None: + # #raise TypeError( + # # "'num_samples' argument is not supported for CL < 1.2") + # if buffer is not None: + # raise TypeError("'buffer' argument is not supported for CL < 1.2") + + # image_old_init(self, context, flags, format, shape, + # pitches, hostbuf) + + # class _ImageInfoGetter: + # def __init__(self, event): + # from warnings import warn + # warn("Image.image.attr is deprecated. " + # "Use Image.attr directly, instead.") + + # self.event = event + + # def __getattr__(self, name): + # try: + # inf_attr = getattr(_cl.image_info, name.upper()) + # except AttributeError: + # raise AttributeError("%s has no attribute '%s'" + # % (type(self), name)) + # else: + # return self.event.get_image_info(inf_attr) + + # def image_shape(self): + # if self.type == mem_object_type.IMAGE2D: + # return (self.width, self.height) + # elif self.type == mem_object_type.IMAGE3D: + # return (self.width, self.height, self.depth) + # else: + # raise LogicError("only images have shapes") + + # Image.__init__ = image_init + # Image.image = property(_ImageInfoGetter) + # Image.shape = property(image_shape) + + # # }}} + + # # {{{ Error + + # def error_str(self): + # val = self.args[0] + # try: + # val.routine + # except AttributeError: + # return str(val) + # else: + # result = "%s failed: %s" % (val.routine(), + # status_code.to_string(val.code(), "<unknown error %d>") + # .lower().replace("_", " ")) + # if val.what(): + # result += " - " + val.what() + # return result + + # def error_code(self): + # return self.args[0].code() + + # def error_routine(self): + # return self.args[0].routine() + + # def error_what(self): + # return self.args[0].what() + + # Error.__str__ = error_str + # Error.code = property(error_code) + # Error.routine = property(error_routine) + # Error.what = property(error_what) + + # # }}} + + # if _cl.have_gl(): + # def gl_object_get_gl_object(self): + # return self.get_gl_object_info()[1] + + # GLBuffer.gl_object = property(gl_object_get_gl_object) + # GLTexture.gl_object = property(gl_object_get_gl_object) + +_add_functionality() + + +# {{{ find pyopencl shipped source code + +def _find_pyopencl_include_path(): + from pkg_resources import Requirement, resource_filename + return resource_filename(Requirement.parse("pyopencl"), "pyopencl/cl") + +# }}} + + +# {{{ convenience + +def create_some_context(interactive=True, answers=None): + import os + if answers is None and "PYOPENCL_CTX" in os.environ: + ctx_spec = os.environ["PYOPENCL_CTX"] + answers = ctx_spec.split(":") + + if answers is not None: + pre_provided_answers = answers + answers = answers[:] + else: + pre_provided_answers = None + + user_inputs = [] + + try: + import sys + if not sys.stdin.isatty(): + interactive = False + except: + interactive = False + + def cc_print(s): + if interactive: + print s + + def get_input(prompt): + if answers: + return str(answers.pop(0)) + elif not interactive: + return '' + else: + user_input = raw_input(prompt) + user_inputs.append(user_input) + return user_input + + # {{{ pick a platform + + platforms = get_platforms() + + if not platforms: + raise Error("no platforms found") + elif len(platforms) == 1: + platform, = platforms + else: + if not answers: + cc_print("Choose platform:") + for i, pf in enumerate(platforms): + cc_print("[%d] %s" % (i, pf)) + + answer = get_input("Choice [0]:") + if not answer: + platform = platforms[0] + else: + platform = None + try: + int_choice = int(answer) + except ValueError: + pass + else: + if 0 <= int_choice < len(platforms): + platform = platforms[int_choice] + + if platform is None: + answer = answer.lower() + for i, pf in enumerate(platforms): + if answer in pf.name.lower(): + platform = pf + if platform is None: + raise RuntimeError("input did not match any platform") + + # }}} + + # {{{ pick a device + + devices = platform.get_devices() + + def parse_device(choice): + try: + int_choice = int(choice) + except ValueError: + pass + else: + if 0 <= int_choice < len(devices): + return devices[int_choice] + + choice = choice.lower() + for i, dev in enumerate(devices): + if choice in dev.name.lower(): + return dev + raise RuntimeError("input did not match any device") + + if not devices: + raise Error("no devices found") + elif len(devices) == 1: + pass + else: + if not answers: + cc_print("Choose device(s):") + for i, dev in enumerate(devices): + cc_print("[%d] %s" % (i, dev)) + + answer = get_input("Choice, comma-separated [0]:") + if not answer: + devices = [devices[0]] + else: + devices = [parse_device(i) for i in answer.split(",")] + + # }}} + + if user_inputs: + if pre_provided_answers is not None: + user_inputs = pre_provided_answers + user_inputs + cc_print("Set the environment variable PYOPENCL_CTX='%s' to " + "avoid being asked again." % ":".join(user_inputs)) + + if answers: + raise RuntimeError("not all provided choices were used by " + "create_some_context. (left over: '%s')" % ":".join(answers)) + + return Context(devices) + +_csc = create_some_context + + +def _mark_copy_deprecated(func): + def new_func(*args, **kwargs): + from warnings import warn + warn("'%s' has been deprecated in version 2011.1. Please use " + "enqueue_copy() instead." % func.__name__[1:], DeprecationWarning, + stacklevel=2) + return func(*args, **kwargs) + + try: + from functools import update_wrapper + except ImportError: + pass + else: + try: + update_wrapper(new_func, func) + except AttributeError: + pass + + return new_func + + +# enqueue_read_image = _mark_copy_deprecated(_cl._enqueue_read_image) +# enqueue_write_image = _mark_copy_deprecated(_cl._enqueue_write_image) +# enqueue_copy_image = _mark_copy_deprecated(_cl._enqueue_copy_image) +# enqueue_copy_image_to_buffer = _mark_copy_deprecated( +# _cl._enqueue_copy_image_to_buffer) +# enqueue_copy_buffer_to_image = _mark_copy_deprecated( +# _cl._enqueue_copy_buffer_to_image) +enqueue_read_buffer = _mark_copy_deprecated(_cl._enqueue_read_buffer) +enqueue_write_buffer = _mark_copy_deprecated(_cl._enqueue_write_buffer) +enqueue_copy_buffer = _mark_copy_deprecated(_cl._enqueue_copy_buffer) + + +# if _cl.get_cl_header_version() >= (1, 1): +# enqueue_read_buffer_rect = _mark_copy_deprecated(_cl._enqueue_read_buffer_rect) +# enqueue_write_buffer_rect = _mark_copy_deprecated(_cl._enqueue_write_buffer_rect) +# enqueue_copy_buffer_rect = _mark_copy_deprecated(_cl._enqueue_copy_buffer_rect) + + +def enqueue_copy(queue, dest, src, **kwargs): + """Copy from :class:`Image`, :class:`Buffer` or the host to + :class:`Image`, :class:`Buffer` or the host. (Note: host-to-host + copies are unsupported.) + + The following keyword arguments are available: + + :arg wait_for: (optional, default empty) + :arg is_blocking: Wait for completion. Defaults to *True*. + (Available on any copy involving host memory) + + :return: A :class:`NannyEvent` if the transfer involved a + host-side buffer, otherwise an :class:`Event`. + + .. ------------------------------------------------------------------------ + .. rubric :: Transfer :class:`Buffer` ↔ host + .. ------------------------------------------------------------------------ + + :arg device_offset: offset in bytes (optional) + + .. note:: + + The size of the transfer is controlled by the size of the + of the host-side buffer. If the host-side buffer + is a :class:`numpy.ndarray`, you can control the transfer size by + transfering into a smaller 'view' of the target array, like this:: + + cl.enqueue_copy(queue, large_dest_numpy_array[:15], src_buffer) + + .. ------------------------------------------------------------------------ + .. rubric :: Transfer :class:`Buffer` ↔ :class:`Buffer` + .. ------------------------------------------------------------------------ + + :arg byte_count: (optional) If not specified, defaults to the + size of the source in versions 2012.x and earlier, + and to the minimum of the size of the source and target + from 2013.1 on. + :arg src_offset: (optional) + :arg dest_offset: (optional) + + .. ------------------------------------------------------------------------ + .. rubric :: Rectangular :class:`Buffer` ↔ host transfers (CL 1.1 and newer) + .. ------------------------------------------------------------------------ + + :arg buffer_origin: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg host_origin: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg region: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg buffer_pitches: :class:`tuple` of :class:`int` of length + two or shorter. (optional, "tightly-packed" if unspecified) + :arg host_pitches: :class:`tuple` of :class:`int` of length + two or shorter. (optional, "tightly-packed" if unspecified) + + .. ------------------------------------------------------------------------ + .. rubric :: Transfer :class:`Image` ↔ host + .. ------------------------------------------------------------------------ + + :arg origin: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg region: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg pitches: :class:`tuple` of :class:`int` of length + two or shorter. (optional) + + .. ------------------------------------------------------------------------ + .. rubric :: Transfer :class:`Buffer` ↔ :class:`Image` + .. ------------------------------------------------------------------------ + + :arg offset: offset in buffer (mandatory) + :arg origin: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg region: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + + .. ------------------------------------------------------------------------ + .. rubric :: Transfer :class:`Image` ↔ :class:`Image` + .. ------------------------------------------------------------------------ + + :arg src_origin: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg dest_origin: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + :arg region: :class:`tuple` of :class:`int` of length + three or shorter. (mandatory) + + |std-enqueue-blurb| + + .. versionadded:: 2011.1 + """ + + if isinstance(dest, MemoryObjectHolder): + if dest.type == mem_object_type.BUFFER: + if isinstance(src, MemoryObjectHolder): + if src.type == mem_object_type.BUFFER: + if "src_origin" in kwargs: + return _cl._enqueue_copy_buffer_rect( + queue, src, dest, **kwargs) + else: + kwargs["dst_offset"] = kwargs.pop("dest_offset", 0) + return _cl._enqueue_copy_buffer(queue, src, dest, **kwargs) + elif src.type in [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]: + return _cl._enqueue_copy_image_to_buffer( + queue, src, dest, **kwargs) + else: + raise ValueError("invalid src mem object type") + else: + # assume from-host + if "buffer_origin" in kwargs: + return _cl._enqueue_write_buffer_rect(queue, dest, src, **kwargs) + else: + return _cl._enqueue_write_buffer(queue, dest, src, **kwargs) + + elif dest.type in [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]: + if isinstance(src, MemoryObjectHolder): + if src.type == mem_object_type.BUFFER: + return _cl._enqueue_copy_buffer_to_image( + queue, src, dest, **kwargs) + elif src.type in [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]: + return _cl._enqueue_copy_image(queue, src, dest, **kwargs) + else: + raise ValueError("invalid src mem object type") + else: + # assume from-host + origin = kwargs.pop("origin") + region = kwargs.pop("region") + + pitches = kwargs.pop("pitches", (0, 0)) + if len(pitches) == 1: + kwargs["row_pitch"], = pitches + else: + kwargs["row_pitch"], kwargs["slice_pitch"] = pitches + + return _cl._enqueue_write_image( + queue, dest, origin, region, src, **kwargs) + else: + raise ValueError("invalid dest mem object type") + + else: + # assume to-host + + if isinstance(src, MemoryObjectHolder): + if src.type == mem_object_type.BUFFER: + if "buffer_origin" in kwargs: + return _cl._enqueue_read_buffer_rect(queue, src, dest, **kwargs) + else: + return _cl._enqueue_read_buffer(queue, src, dest, **kwargs) + elif src.type in [mem_object_type.IMAGE2D, mem_object_type.IMAGE3D]: + origin = kwargs.pop("origin") + region = kwargs.pop("region") + + pitches = kwargs.pop("pitches", (0, 0)) + if len(pitches) == 1: + kwargs["row_pitch"], = pitches + else: + kwargs["row_pitch"], kwargs["slice_pitch"] = pitches + + return _cl._enqueue_read_image( + queue, src, origin, region, dest, **kwargs) + else: + raise ValueError("invalid src mem object type") + else: + # assume from-host + raise TypeError("enqueue_copy cannot perform host-to-host transfers") + +# }}} + +# {{{ image creation + +DTYPE_TO_CHANNEL_TYPE = { + np.dtype(np.float32): channel_type.FLOAT, + np.dtype(np.int16): channel_type.SIGNED_INT16, + np.dtype(np.int32): channel_type.SIGNED_INT32, + np.dtype(np.int8): channel_type.SIGNED_INT8, + np.dtype(np.uint16): channel_type.UNSIGNED_INT16, + np.dtype(np.uint32): channel_type.UNSIGNED_INT32, + np.dtype(np.uint8): channel_type.UNSIGNED_INT8, + } +try: + np.float16 +except: + pass +else: + DTYPE_TO_CHANNEL_TYPE[np.dtype(np.float16)] = channel_type.HALF_FLOAT, + +DTYPE_TO_CHANNEL_TYPE_NORM = { + np.dtype(np.int16): channel_type.SNORM_INT16, + np.dtype(np.int8): channel_type.SNORM_INT8, + np.dtype(np.uint16): channel_type.UNORM_INT16, + np.dtype(np.uint8): channel_type.UNORM_INT8, + } + + +def image_from_array(ctx, ary, num_channels=None, mode="r", norm_int=False): + if not ary.flags.c_contiguous: + raise ValueError("array must be C-contiguous") + + dtype = ary.dtype + if num_channels is None: + + from pyopencl.array import vec + try: + dtype, num_channels = vec.type_to_scalar_and_count[dtype] + except KeyError: + # It must be a scalar type then. + num_channels = 1 + + shape = ary.shape + strides = ary.strides + + elif num_channels == 1: + shape = ary.shape + strides = ary.strides + else: + if ary.shape[-1] != num_channels: + raise RuntimeError("last dimension must be equal to number of channels") + + shape = ary.shape[:-1] + strides = ary.strides[:-1] + + if mode == "r": + mode_flags = mem_flags.READ_ONLY + elif mode == "w": + mode_flags = mem_flags.WRITE_ONLY + else: + raise ValueError("invalid value '%s' for 'mode'" % mode) + + img_format = { + 1: channel_order.R, + 2: channel_order.RG, + 3: channel_order.RGB, + 4: channel_order.RGBA, + }[num_channels] + + assert ary.strides[-1] == ary.dtype.itemsize + + if norm_int: + channel_type = DTYPE_TO_CHANNEL_TYPE_NORM[dtype] + else: + channel_type = DTYPE_TO_CHANNEL_TYPE[dtype] + + return Image(ctx, mode_flags | mem_flags.COPY_HOST_PTR, + ImageFormat(img_format, channel_type), + shape=shape[::-1], pitches=strides[::-1][1:], + hostbuf=ary) + +# }}} + + +# {{{ enqueue_* compatibility shims + +def enqueue_marker(queue, wait_for=None): + if queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2): + return _cl._enqueue_marker_with_wait_list(queue, wait_for) + else: + if wait_for: + _cl._enqueue_wait_for_events(queue, wait_for) + return _cl._enqueue_marker(queue) + + +def enqueue_barrier(queue, wait_for=None): + if queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2): + return _cl._enqueue_barrier_with_wait_list(queue, wait_for) + else: + _cl._enqueue_barrier(queue) + if wait_for: + _cl._enqueue_wait_for_events(queue, wait_for) + return _cl._enqueue_marker(queue) + + +def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None): + if not (queue._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2)): + from warnings import warn + warn("The context for this queue does not declare OpenCL 1.2 support, so " + "the next thing you might see is a crash") + return _cl.enqueue_fill_buffer(queue, mem, pattern, offset, + size, wait_for=None) + + + +# }}} + + +# vim: foldmethod=marker diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py index d6d8f69a..19b08cd2 100644 --- a/pyopencl/cffi_cl.py +++ b/pyopencl/cffi_cl.py @@ -4,7 +4,6 @@ import warnings _ffi, _lib = _get_lib() - bitlog2 = _lib.bitlog2 class _CArray(object): @@ -14,7 +13,7 @@ class _CArray(object): def __del__(self): _lib._free(self.ptr[0]) - + def __getitem__(self, key): return self.ptr[0].__getitem__(key) @@ -60,7 +59,7 @@ class Error(Exception): self.code = code self.what = msg super(Error, self).__init__(self, msg) - + class MemoryError(Error): pass class LogicError(Error): @@ -91,7 +90,7 @@ class _Common(object): def __del__(self): _lib._delete(self.ptr, self._c_class_type()) - + def __eq__(self, other): return hash(self) == hash(other) @@ -113,7 +112,7 @@ class _Common(object): _lib._from_int_ptr(ptr, int_ptr_value, getattr(_lib, 'CLASS_%s' % cls._id.upper())) #getattr(_lib, '%s__from_int_ptr' % cls._id)(ptr, int_ptr_value) return _create_instance(cls, ptr[0]) - + class Device(_Common): _id = 'device' @@ -133,7 +132,7 @@ def _parse_context_properties(properties): prop, value = prop_tuple if prop is None: raise RuntimeError("Context", status_code.INVALID_VALUE, "invalid context property") - + props.append(prop) if prop == context_properties.PLATFORM: props.append(value.int_ptr) @@ -157,14 +156,14 @@ def _parse_context_properties(properties): props.append(0) return _ffi.new('intptr_t[]', props) - + class Context(_Common): _id = 'context' def __init__(self, devices=None, properties=None, dev_type=None): c_props = _parse_context_properties(properties) status_code = _ffi.new('cl_int *') - + # from device list if devices is not None: if dev_type is not None: @@ -172,7 +171,7 @@ class Context(_Common): ptr_devices = _ffi.new('void*[]', [device.ptr for device in devices]) ptr_ctx = _ffi.new('void **') _handle_error(_lib._create_context(ptr_ctx, c_props, len(ptr_devices), _ffi.cast('void**', ptr_devices))) - + else: # from dev_type raise NotImplementedError() @@ -192,15 +191,15 @@ class MemoryObjectHolder(_Common): class MemoryObject(MemoryObjectHolder): pass - + class Buffer(MemoryObjectHolder): _id = 'buffer' - + @classmethod def _c_buffer_from_obj(cls, obj): # assume numpy array for now return _ffi.cast('void *', obj.__array_interface__['data'][0]), obj.nbytes - + def __init__(self, context, flags, size=0, hostbuf=None): if hostbuf is not None and not (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)): warnings.warn("'hostbuf' was passed, but no memory flags to make use of it.") @@ -232,7 +231,7 @@ class _Program(_Common): def _init_binary(self, context, devices, binaries): if len(devices) != len(binaries): raise RuntimeError("create_program_with_binary", status_code.INVALID_VALUE, "device and binary counts don't match") - + ptr_program = _ffi.new('void **') ptr_devices = _ffi.new('void*[]', [device.ptr for device in devices]) ptr_binaries = [_ffi.new('char[%i]' % len(binary), binary) for binary in binaries] @@ -246,7 +245,7 @@ class _Program(_Common): len(ptr_binaries), _ffi.new('char*[]', ptr_binaries), binary_sizes)) - + self.ptr = ptr_program[0] def kind(self): @@ -264,7 +263,7 @@ class _Program(_Common): else: ptr_devices = _ffi.new('void*[]', [device.ptr for device in devices]) num_devices = len(devices) - + _handle_error(_lib.program__build(self.ptr, _ffi.new('char[]', options), num_devices, _ffi.cast('void**', ptr_devices))) @@ -272,7 +271,7 @@ class _Program(_Common): info = _ffi.new('generic_info *') _handle_error(_lib.program__get_build_info(self.ptr, device.ptr, param, info)) return _generic_info_to_python(info) - + class Platform(_Common): _id = 'platform' # todo: __del__ @@ -308,7 +307,7 @@ def _generic_info_to_python(info): from . import Program return Program(ins) return ins - + if type_.endswith(']'): ret = map(ci, value) _lib._free(info.value) @@ -335,12 +334,12 @@ def _generic_info_to_python(info): class Kernel(_Common): _id = 'kernel' - + def __init__(self, program, name): ptr_kernel = _ffi.new('void **') _handle_error(_lib._create_kernel(ptr_kernel, program.ptr, name)) self.ptr = ptr_kernel[0] - + def set_arg(self, arg_index, arg): if isinstance(arg, Buffer): _handle_error(_lib.kernel__set_arg_mem_buffer(self.ptr, arg_index, arg.ptr)) @@ -352,7 +351,7 @@ class Kernel(_Common): _handle_error(_lib.kernel__get_work_group_info(self.ptr, param, device.ptr, info)) return _generic_info_to_python(info) - + def get_platforms(): platforms = _CArray(_ffi.new('void**')) _handle_error(_lib.get_platforms(platforms.ptr, platforms.size)) @@ -361,7 +360,7 @@ def get_platforms(): # TODO why is the cast needed? platform_ptr = _ffi.cast('void**', platforms.ptr[0])[i] result.append(_create_instance(Platform, platform_ptr)) - + return result class Event(_Common): @@ -378,21 +377,21 @@ def enqueue_nd_range_kernel(queue, kernel, global_work_size, local_work_size, gl if wait_for is not None: raise NotImplementedError("wait_for") work_dim = len(global_work_size) - + if local_work_size is not None: if g_times_l: work_dim = max(work_dim, len(local_work_size)) elif work_dim != len(local_work_size): raise RuntimeError("enqueue_nd_range_kernel", status_code.INVALID_VALUE, "global/local work sizes have differing dimensions") - + local_work_size = list(local_work_size) - + if len(local_work_size) < work_dim: local_work_size.extend([1] * (work_dim - len(local_work_size))) if len(global_work_size) < work_dim: global_work_size.extend([1] * (work_dim - len(global_work_size))) - + elif g_times_l: for i in xrange(work_dim): global_work_size[i] *= local_work_size[i] @@ -423,7 +422,7 @@ def _c_wait_for(wait_for=None): if wait_for is None: return _ffi.NULL, 0 return _ffi.new('void *[]', [ev.ptr for ev in wait_for]), len(wait_for) - + def _enqueue_read_buffer(queue, mem, buf, device_offset=0, wait_for=None, is_blocking=True): c_buf, size = Buffer._c_buffer_from_obj(buf) ptr_event = _ffi.new('void **') @@ -471,9 +470,9 @@ def _enqueue_write_buffer(queue, mem, hostbuf, device_offset=0, wait_for=None, i )) return _create_instance(Event, ptr_event[0]) - + def _create_instance(cls, ptr): ins = cls.__new__(cls) ins.ptr = ptr return ins - + diff --git a/setup.py b/setup.py index e33a9406..931767da 100644 --- a/setup.py +++ b/setup.py @@ -55,6 +55,9 @@ def get_config_schema(): def main(): + import os + os.environ['PYOPENCL_SETUP'] = '1' + from aksetup_helper import (hack_distutils, get_config, setup, NumpyExtension, check_git_submodules) @@ -160,6 +163,11 @@ def main(): shutil.copyfile("src/c_wrapper/wrap_cl_core.h", "pyopencl/wrap_cl_core.h") from pyopencl._cffi import _get_verifier + import os.path + current_directory = os.path.dirname(__file__) + + # for development: clean cache such that the extension is rebuilt + shutil.rmtree(os.path.join(current_directory, 'pyopencl', '__pycache__/'), ignore_errors=True) setup(name="pyopencl", # metadata @@ -207,7 +215,6 @@ def main(): ext_package="pyopencl", ext_modules=[ _get_verifier( - ext_package='pyopencl', # needs to be the same as above sources=[ "src/c_wrapper/wrap_cl.cpp", "src/c_wrapper/wrap_constants.cpp", @@ -220,9 +227,8 @@ def main(): define_macros=list(EXTRA_DEFINES.items()), extra_compile_args=conf["CXXFLAGS"], extra_link_args=conf["LDFLAGS"], - ).get_extension() - ], + ], include_package_data=True, package_data={ @@ -230,6 +236,7 @@ def main(): "cl/*.cl", "cl/*.h", "wrap_cl_core.h", + "_cl.so", ] }, -- GitLab