diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0047755fcbc3ec26d86b3c24075354534c00ef13..789cef1c8d1cab5708176863b8402655289c1305 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,9 +1,8 @@ -"Python 2.7 AMD CPU (+GL and special func)": +"Python 2.7 AMD CPU": script: - export PY_EXE=python2.7 - export PYOPENCL_TEST=amd:pu - - export EXTRA_INSTALL="numpy mako scipy pyfmmlib" - - echo "CL_ENABLE_GL = True" > siteconf.py + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" allow_failure: true @@ -18,7 +17,7 @@ Python 3.6 Intel CPU: script: - export PY_EXE=python3.6 - export PYOPENCL_TEST="intel(r):pu" - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" allow_failure: true @@ -32,7 +31,7 @@ Python 3.6 AMD CPU: script: - export PY_EXE=python3.6 - export PYOPENCL_TEST=amd:pu - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" allow_failure: true @@ -46,7 +45,7 @@ Python 3.6 Titan X: script: - export PY_EXE=python3.5 - export PYOPENCL_TEST=nvi:titan - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -59,7 +58,7 @@ Python 3.6 K40: script: - export PY_EXE=python3.6 - export PYOPENCL_TEST=nvi:k40 - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -72,7 +71,11 @@ Python 3.6 AMD GPU: script: - export PY_EXE=python3.6 - export PYOPENCL_TEST=amd:fiji - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" + + # https://andreask.cs.illinois.edu/MachineShop/UserNotes + - export OCL_ICD_VENDORS=/etc/OpenCLwithAMD/vendors + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" allow_failure: true @@ -82,25 +85,11 @@ Python 3.6 AMD GPU: except: - tags -Python 2.6 POCL CPU: - script: - - export PY_EXE=python2.6 - - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" - - export NO_DOCTESTS=1 - - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - - ". ./build-and-test-py-project.sh" - tags: - - python2.6 - - amd-cl-cpu - except: - - tags - Python 2.7 POCL: script: - export PY_EXE=python2.7 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -113,7 +102,7 @@ Python 3.7 POCL: script: - export PY_EXE=python3.7 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -126,7 +115,7 @@ Python 3.6 POCL CL 1.1: script: - export PY_EXE=python3.5 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - echo "CL_PRETEND_VERSION = '1.1'" > siteconf.py - ". ./build-and-test-py-project.sh" @@ -140,7 +129,21 @@ Python 3.6 POCL: script: - export PY_EXE=python3.6 - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" + - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh + - ". ./build-and-test-py-project.sh" + tags: + - python3.6 + - pocl + except: + - tags + +Python 3.6 POCL (+GL and special functions): + script: + - export PY_EXE=python3.6 + - export PYOPENCL_TEST=portable + - export EXTRA_INSTALL="pybind11 numpy mako scipy pyfmmlib" + - echo "CL_ENABLE_GL = True" > siteconf.py - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" tags: @@ -153,7 +156,7 @@ Python 2.7 Apple: script: - export PY_EXE=python2.7 - export PYOPENCL_TEST=app:cpu - - export EXTRA_INSTALL="numpy mako" + - export EXTRA_INSTALL="pybind11 numpy mako" - export PKG_CONFIG_PATH=/usr/local/opt/libffi/lib/pkgconfig - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" @@ -178,10 +181,14 @@ PyPy POCL: script: - export PY_EXE=pypy - export PYOPENCL_TEST=portable - - export EXTRA_INSTALL="numpy mako" + + # https://github.com/pybind/pybind11/pull/1494 + - export EXTRA_INSTALL="git+https://github.com/inducer/pybind11 numpy mako" + - export NO_DOCTESTS=1 - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-and-test-py-project.sh - ". ./build-and-test-py-project.sh" + tags: - pypy - pocl @@ -190,7 +197,7 @@ PyPy POCL: Documentation: script: - - EXTRA_INSTALL="numpy mako" + - EXTRA_INSTALL="pybind11 numpy mako" - curl -L -O -k https://gitlab.tiker.net/inducer/ci-support/raw/master/build-docs.sh - ". ./build-docs.sh" tags: diff --git a/.gitmodules b/.gitmodules index cb5a4e231848994b7d000813df14d87155613ece..779ec4875e9d74d3b4eef45f6349e551c3258339 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "pyopencl/compyte"] path = pyopencl/compyte url = https://github.com/inducer/compyte -[submodule "src/c_wrapper/mingw-std-threads"] - path = src/c_wrapper/mingw-std-threads - url = https://github.com/meganz/mingw-std-threads.git diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index a149bbe4cb8008125d3fe2a7dc029445b66a6ab4..b7824b0bb6b3cdbc4070215affce4e3ae7f1751d 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -9,3 +9,4 @@ dependencies: - pocl - osx-pocl-opencl - mako +- pybind11 diff --git a/cffi_build.py.in b/cffi_build.py.in deleted file mode 100644 index f948c8247a124ea40b3770c13da5d4fb14c6539c..0000000000000000000000000000000000000000 --- a/cffi_build.py.in +++ /dev/null @@ -1,86 +0,0 @@ -from __future__ import absolute_import, print_function - -__copyright__ = """ -Copyright (C) 2009-15 Andreas Kloeckner -Copyright (C) 2013 Marko Bencun -Copyright (C) 2014 Yuyi Chao -""" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - - -from cffi import FFI - -ffi = FFI() - - -with open("cl_types.h", "rt") as f: - ffi.cdef(f.read()) - -if {CL_ENABLE_GL}: - with open("cl_gl_types.h") as f: - ffi.cdef(f.read()) - -with open("src/c_wrapper/wrap_cl_core.h", "rt") as f: - ffi.cdef(f.read()) - -if {CL_ENABLE_GL}: - with open("src/c_wrapper/wrap_cl_gl_core.h") as f: - ffi.cdef(f.read()) - -ffi.set_source("pyopencl._cffi", - """ - #include "wrap_cl.h" - """, - define_macros=list({EXTRA_DEFINES}.items()), - include_dirs=( - {CL_INC_DIR} + ["src/c_wrapper/"]), - library_dirs={CL_LIB_DIR}, - libraries={CL_LIBNAME}, - extra_compile_args=({CXXFLAGS}), - extra_link_args={LDFLAGS}, - source_extension=".cpp", - sources=[ - "src/c_wrapper/wrap_cl.cpp", - "src/c_wrapper/wrap_constants.cpp", - "src/c_wrapper/bitlog.cpp", - "src/c_wrapper/pyhelper.cpp", - "src/c_wrapper/platform.cpp", - "src/c_wrapper/device.cpp", - "src/c_wrapper/context.cpp", - "src/c_wrapper/command_queue.cpp", - "src/c_wrapper/event.cpp", - "src/c_wrapper/memory_object.cpp", - "src/c_wrapper/svm.cpp", - "src/c_wrapper/image.cpp", - "src/c_wrapper/gl_obj.cpp", - "src/c_wrapper/memory_map.cpp", - "src/c_wrapper/buffer.cpp", - "src/c_wrapper/sampler.cpp", - "src/c_wrapper/program.cpp", - "src/c_wrapper/kernel.cpp", - "src/c_wrapper/debug.cpp", - ] - ) - - -if __name__ == "__main__": - ffi.compile() diff --git a/cl_gl_types.h b/cl_gl_types.h deleted file mode 100644 index ea0e7e4df0dffdbb20dffeae094e70a0f6d23243..0000000000000000000000000000000000000000 --- a/cl_gl_types.h +++ /dev/null @@ -1,12 +0,0 @@ -/* cl_gl.h */ -typedef cl_uint cl_gl_object_type; -typedef cl_uint cl_gl_texture_info; -typedef cl_uint cl_gl_platform_info; -typedef struct __GLsync *cl_GLsync; -typedef cl_uint cl_gl_context_info; - -/* cl_egl.h */ -typedef void* CLeglImageKHR; -typedef void* CLeglDisplayKHR; -typedef void* CLeglSyncKHR; -typedef intptr_t cl_egl_image_properties_khr; diff --git a/cl_types.h b/cl_types.h deleted file mode 100644 index 5df1601343b0d2ea5540fab54b1a4c8fabdeab6e..0000000000000000000000000000000000000000 --- a/cl_types.h +++ /dev/null @@ -1,128 +0,0 @@ -/* gl.h */ -typedef unsigned int GLenum; -typedef int GLint; /* 4-byte signed */ -typedef unsigned int GLuint; /* 4-byte unsigned */ - - -/* cl.h */ -/* scalar types */ -typedef int8_t cl_char; -typedef uint8_t cl_uchar; -typedef int16_t cl_short; -typedef uint16_t cl_ushort; -typedef int32_t cl_int; -typedef uint32_t cl_uint; -typedef int64_t cl_long; -typedef uint64_t cl_ulong; - -typedef uint16_t cl_half; -typedef float cl_float; -typedef double cl_double; - - -typedef struct _cl_platform_id * cl_platform_id; -typedef struct _cl_device_id * cl_device_id; -typedef struct _cl_context * cl_context; -typedef struct _cl_command_queue * cl_command_queue; -typedef struct _cl_mem * cl_mem; -typedef struct _cl_program * cl_program; -typedef struct _cl_kernel * cl_kernel; -typedef struct _cl_event * cl_event; -typedef struct _cl_sampler * cl_sampler; - -/* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be -the same size as the bool in kernels. */ -typedef cl_uint cl_bool; -typedef cl_ulong cl_bitfield; -typedef cl_bitfield cl_device_type; -typedef cl_uint cl_platform_info; -typedef cl_uint cl_device_info; -typedef cl_bitfield cl_device_fp_config; -typedef cl_uint cl_device_mem_cache_type; -typedef cl_uint cl_device_local_mem_type; -typedef cl_bitfield cl_device_exec_capabilities; -typedef cl_bitfield cl_device_svm_capabilities; // 2.0 -typedef cl_bitfield cl_command_queue_properties; -typedef intptr_t cl_device_partition_property; -typedef cl_bitfield cl_device_affinity_domain; - -typedef intptr_t cl_context_properties; -typedef cl_uint cl_context_info; -typedef cl_uint cl_command_queue_info; -typedef cl_uint cl_channel_order; -typedef cl_uint cl_channel_type; -typedef cl_bitfield cl_mem_flags; -typedef cl_bitfield cl_svm_mem_flags; // 2.0 -typedef cl_uint cl_mem_object_type; -typedef cl_uint cl_mem_info; -typedef cl_bitfield cl_mem_migration_flags; -typedef cl_uint cl_image_info; -typedef cl_uint cl_buffer_create_type; -typedef cl_uint cl_addressing_mode; -typedef cl_uint cl_filter_mode; -typedef cl_uint cl_sampler_info; -typedef cl_bitfield cl_map_flags; -typedef intptr_t cl_pipe_properties; // 2.0 -typedef cl_uint cl_pipe_info; // 2.0 -typedef cl_uint cl_program_info; -typedef cl_uint cl_program_build_info; -typedef cl_uint cl_program_binary_type; -typedef cl_int cl_build_status; -typedef cl_uint cl_kernel_info; -typedef cl_uint cl_kernel_arg_info; -typedef cl_uint cl_kernel_arg_address_qualifier; -typedef cl_uint cl_kernel_arg_access_qualifier; -typedef cl_bitfield cl_kernel_arg_type_qualifier; -typedef cl_uint cl_kernel_work_group_info; -typedef cl_uint cl_event_info; -typedef cl_uint cl_command_type; -typedef cl_uint cl_profiling_info; -typedef cl_bitfield cl_sampler_properties; // 2.0 -typedef cl_uint cl_kernel_exec_info; // 2.0 - -typedef struct _cl_image_format { - cl_channel_order image_channel_order; - cl_channel_type image_channel_data_type; -} cl_image_format; - -typedef struct _cl_image_desc { - cl_mem_object_type image_type; - size_t image_width; - size_t image_height; - size_t image_depth; - size_t image_array_size; - size_t image_row_pitch; - size_t image_slice_pitch; - cl_uint num_mip_levels; - cl_uint num_samples; - cl_mem buffer; -} cl_image_desc; - -typedef struct _cl_buffer_region { - size_t origin; - size_t size; -} cl_buffer_region; - -/* cl_ext.h */ - -typedef union -{ - struct { cl_uint type; cl_uint data[5]; } raw; - struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; -} cl_device_topology_amd; - -/* -typedef cl_ulong cl_device_partition_property_ext; -typedef cl_uint cl_image_pitch_info_qcom; -typedef struct _cl_mem_ext_host_ptr { - cl_uint allocation_type; - cl_uint host_cache_policy; -} cl_mem_ext_host_ptr; -typedef struct _cl_mem_ion_host_ptr { - cl_mem_ext_host_ptr ext_host_ptr; - int ion_filedesc; - void* ion_hostptr; -} cl_mem_ion_host_ptr; - -typedef cl_bitfield cl_mem_migration_flags_ext; -*/ diff --git a/doc/index.rst b/doc/index.rst index f771ab25332ba378c42ae5b403e47e267009af2d..d715d1a2e945f2e7d9e717bfd5add37c244a093a 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -108,6 +108,7 @@ Contents runtime_program runtime_gl array + types algorithm howto tools diff --git a/doc/make_constants.py b/doc/make_constants.py index c9de4cd87fef509b0bbd5c6ef7e2036af77eb6c4..9ab78ad070ec6d0cc419458335a75ed44f9c9a16 100644 --- a/doc/make_constants.py +++ b/doc/make_constants.py @@ -335,7 +335,6 @@ const_ext_lookup = { "WRITE_BUFFER_RECT": cl_11, "COPY_BUFFER_RECT": cl_11, "USER": cl_11, - "MIGRATE_MEM_OBJECT_EXT": ("cl_ext_migrate_memobject", "2011.2"), "BARRIER": cl_12, "MIGRATE_MEM_OBJECTS": cl_12, "FILL_BUFFER": cl_12, @@ -408,9 +407,6 @@ const_ext_lookup = { "CONTENT_UNDEFINED": cl_12, }, - cl.migrate_mem_object_flags_ext: { - "HOST": ("cl_ext_migrate_memobject", "2011.2"), - }, } try: gl_ci = cl.gl_context_info diff --git a/doc/misc.rst b/doc/misc.rst index 7459f89f525cd9d178f3b3814aac0fc3f9ddd527..b4e30db755258400525464fdfd4b88d2e082582d 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -121,6 +121,8 @@ checking `this file Note that the triple-quoted strings containing the source must start with `"""//CL// ..."""`. +.. _ipython-integration: + IPython integration ------------------- diff --git a/doc/runtime_memory.rst b/doc/runtime_memory.rst index 75b60253d3d5c267e4d70707a8714710de2eaae0..a4ad2d5f22b081baa123567f90044dd80089f813 100644 --- a/doc/runtime_memory.rst +++ b/doc/runtime_memory.rst @@ -43,15 +43,6 @@ Memory Migration Only available with CL 1.2. -.. function:: enqueue_migrate_mem_object_ext(queue, mem_objects, flags=0, wait_for=None) - - :param flags: from :class:`migrate_mem_object_flags_ext` - - .. versionadded:: 2011.2 - - Only available with the `cl_ext_migrate_memobject` - extension. - Buffer ------ @@ -361,13 +352,28 @@ Mapping Memory into Host Address Space Samplers -------- -.. class:: Sampler(context, normalized_coords, addressing_mode, filter_mode) +.. class:: Sampler + + + .. method:: __init__(context, normalized_coords, addressing_mode, filter_mode) + + *normalized_coords* is a :class:`bool` indicating whether + to use coordinates between 0 and 1 (*True*) or the texture's + natural pixel size (*False*). + See :class:`addressing_mode` and :class:`filter_mode` for possible + argument values. + + .. method:: __init__(context, properties) + + :arg properties: a sequence + of keys and values from :class:`sampler_properties` as accepted + by :c:func:`clCreateSamplerWithProperties` (see the OpenCL + spec for details). The trailing *0* is added automatically + and does not need to be included. + + Requires OpenCL 2 or newer. - *normalized_coords* is a :class:`bool` indicating whether - to use coordinates between 0 and 1 (*True*) or the texture's - natural pixel size (*False*). - See :class:`addressing_mode` and :class:`filter_mode` for possible - argument values. + .. versionadded:: 2018.2 .. attribute:: info diff --git a/doc/runtime_queue.rst b/doc/runtime_queue.rst index b4567953c889acdaafd9c49bc8d04d2de745bbc5..c0b42897d151cd95c1289665e4f5a00d801fc078 100644 --- a/doc/runtime_queue.rst +++ b/doc/runtime_queue.rst @@ -13,20 +13,35 @@ Command Queue Create a new command queue. *properties* is a bit field consisting of :class:`command_queue_properties` values. - if *device* is None, one of the devices in *context* is chosen + If *device* is None, one of the devices in *context* is chosen in an implementation-defined manner. + *properties* may be a bitwise combination of values from + :class:`queue_properties` (or *None* which is equivalent to + passing *0*). This is compatible with both OpenCL 1.x and 2.x. + + For OpenCL 2.0 and above, *properties* may also be a sequence + of keys and values from :class:`queue_properties` as accepted + by :c:func:`clCreateCommandQueueWithProperties` (see the OpenCL + spec for details). The trailing *0* is added automatically + and does not need to be included. + A :class:`CommandQueue` may be used as a context manager, like this:: with cl.CommandQueue(self.cl_context) as queue: enqueue_stuff(queue, ...) - :meth:`finish` is automatically called at the end of the context. + :meth:`finish` is automatically called at the end of the ``with``-delimited + context. .. versionadded:: 2013.1 Context manager capability. + .. versionchanged:: 2018.2 + + Added the sequence-of-properties interface for OpenCL 2. + .. attribute:: info Lower case versions of the :class:`command_queue_info` constants diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 539349755d4a939baa858ac2ca0c0449f8a8d0e1..d0904c29f54e66cdb403233fd0c984c685d99c5f 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -26,7 +26,7 @@ THE SOFTWARE. import re import six -from six.moves import input +from six.moves import input, intern from pyopencl.version import VERSION, VERSION_STATUS, VERSION_TEXT # noqa @@ -34,7 +34,7 @@ import logging logger = logging.getLogger(__name__) try: - import pyopencl.cffi_cl as _cl + import pyopencl._cl as _cl except ImportError: import os from os.path import dirname, join, realpath @@ -46,7 +46,12 @@ except ImportError: import numpy as np -from pyopencl.cffi_cl import ( # noqa +import sys + +_PYPY = '__pypy__' in sys.builtin_module_names +_CPY2 = not _PYPY and sys.version_info < (3,) + +from pyopencl._cl import ( # noqa get_cl_header_version, program_kind, status_code, @@ -94,18 +99,13 @@ from pyopencl.cffi_cl import ( # noqa command_execution_status, profiling_info, mem_migration_flags, - mem_migration_flags_ext, device_partition_property, device_affinity_domain, - gl_object_type, - gl_texture_info, - migrate_mem_object_flags_ext, Error, MemoryError, LogicError, RuntimeError, Platform, get_platforms, - unload_platform_compiler, Device, Context, @@ -115,30 +115,18 @@ from pyopencl.cffi_cl import ( # noqa MemoryObject, MemoryMap, Buffer, - SVMAllocation, - SVM, - SVMMap, - CompilerWarning, _Program, Kernel, Event, wait_for_events, NannyEvent, - UserEvent, enqueue_nd_range_kernel, - enqueue_task, - _enqueue_marker_with_wait_list, _enqueue_marker, - _enqueue_barrier_with_wait_list, - - enqueue_migrate_mem_objects, - enqueue_migrate_mem_object_ext, - _enqueue_barrier_with_wait_list, _enqueue_read_buffer, _enqueue_write_buffer, _enqueue_copy_buffer, @@ -146,49 +134,106 @@ from pyopencl.cffi_cl import ( # noqa _enqueue_write_buffer_rect, _enqueue_copy_buffer_rect, - enqueue_map_buffer, - _enqueue_fill_buffer, _enqueue_read_image, _enqueue_copy_image, _enqueue_write_image, - enqueue_map_image, - enqueue_fill_image, _enqueue_copy_image_to_buffer, _enqueue_copy_buffer_to_image, - enqueue_svm_memfill, - enqueue_svm_migratemem, have_gl, - _GLObject, - GLBuffer, - GLRenderBuffer, ImageFormat, get_supported_image_formats, - ImageDescriptor, Image, Sampler, - GLTexture, DeviceTopologyAmd, + ) - add_get_info_attrs as _add_get_info_attrs, +if not _PYPY: + # FIXME: Add back to default set when pypy support catches up + from pyopencl._cl import ( # noqa + enqueue_map_buffer, + enqueue_map_image, + ) + +if get_cl_header_version() >= (1, 1): + from pyopencl._cl import ( # noqa + UserEvent, + ) +if get_cl_header_version() >= (1, 2): + from pyopencl._cl import ( # noqa + _enqueue_marker_with_wait_list, + _enqueue_barrier_with_wait_list, + + unload_platform_compiler, + + + enqueue_migrate_mem_objects, + _enqueue_fill_buffer, + enqueue_fill_image, + + ImageDescriptor, + ) + +if get_cl_header_version() >= (2, 0): + from pyopencl._cl import ( # noqa + SVMAllocation, + SVM, + + # FIXME + #enqueue_svm_migratemem, ) if _cl.have_gl(): + from pyopencl._cl import ( # noqa + gl_object_type, + gl_texture_info, + + GLBuffer, + GLRenderBuffer, + GLTexture, + ) + try: - from pyopencl.cffi_cl import get_apple_cgl_share_group # noqa + from pyopencl._cl import get_apple_cgl_share_group # noqa except ImportError: pass try: - from pyopencl.cffi_cl import ( # noqa + from pyopencl._cl import ( # noqa enqueue_acquire_gl_objects, enqueue_release_gl_objects, ) except ImportError: pass +import inspect as _inspect + +CONSTANT_CLASSES = tuple( + getattr(_cl, name) for name in dir(_cl) + if _inspect.isclass(getattr(_cl, name)) + and name[0].islower() and name not in ["zip", "map", "range"]) + + +# {{{ diagnostics + +class CompilerWarning(UserWarning): + pass + + +def compiler_output(text): + import os + from warnings import warn + if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")): + warn(text, CompilerWarning) + else: + warn("Non-empty compiler output encountered. Set the " + "environment variable PYOPENCL_COMPILER_OUTPUT=1 " + "to see more.", CompilerWarning) + +# }}} + # {{{ find pyopencl shipped source code @@ -310,8 +355,8 @@ class Program(object): return self._get_prg().int_ptr int_ptr = property(int_ptr, doc=_cl._Program.int_ptr.__doc__) - def from_int_ptr(int_ptr_value): - return Program(_cl._Program.from_int_ptr(int_ptr_value)) + def from_int_ptr(int_ptr_value, retain=True): + return Program(_cl._Program.from_int_ptr(int_ptr_value, retain)) from_int_ptr.__doc__ = _cl._Program.from_int_ptr.__doc__ from_int_ptr = staticmethod(from_int_ptr) @@ -478,7 +523,7 @@ class Program(object): try: return build_func() except _cl.RuntimeError as e: - msg = e.what + msg = str(e) if options_bytes: msg = msg + "\n(options: %s)" % options_bytes.decode("utf-8") @@ -496,7 +541,7 @@ class Program(object): routine = e.routine err = _cl.RuntimeError( - _cl.Error._ErrorRecord( + _cl._ErrorRecord( msg=msg, code=code, routine=routine)) @@ -522,9 +567,6 @@ class Program(object): return hash(self._get_prg()) -_add_get_info_attrs(Program, Program.get_info, program_info) - - def create_program_with_built_in_kernels(context, devices, kernel_names): if not isinstance(kernel_names, str): kernel_names = ":".join(kernel_names) @@ -540,9 +582,750 @@ def link_program(context, programs, options=[], devices=None): # }}} +# {{{ monkeypatch C++ wrappers to add functionality + +def _add_functionality(): + def generic_get_cl_version(self): + import re + version_string = self.version + match = re.match(r"^OpenCL ([0-9]+)\.([0-9]+) .*$", version_string) + if match is None: + raise RuntimeError("%s %s returned non-conformant " + "platform version string '%s'" % + (type(self).__name__, self, version_string)) + + return int(match.group(1)), int(match.group(2)) + + # {{{ Platform + + def platform_repr(self): + return "" % (self.name, self.int_ptr) + + Platform.__repr__ = platform_repr + Platform._get_cl_version = generic_get_cl_version + + # }}} + + # {{{ Device + + def device_repr(self): + return "" % ( + self.name.strip(), self.platform.name.strip(), self.int_ptr) + + def device_persistent_unique_id(self): + return (self.vendor, self.vendor_id, self.name, self.version) + + Device.__repr__ = device_repr + + # undocumented for now: + Device._get_cl_version = generic_get_cl_version + Device.persistent_unique_id = property(device_persistent_unique_id) + + # }}} + + # {{{ Context + + context_old_init = Context.__init__ + + def context_init(self, devices, properties, dev_type, cache_dir=None): + if cache_dir is not None: + from warnings import warn + warn("The 'cache_dir' argument to the Context constructor " + "is deprecated and no longer has an effect. " + "It was removed because it only applied to the wrapper " + "object and not the context itself, leading to inconsistencies.", + DeprecationWarning, stacklevel=2) + + context_old_init(self, devices, properties, dev_type) + + def context_repr(self): + return "" % (self.int_ptr, + ", ".join(repr(dev) for dev in self.devices)) + + def context_get_cl_version(self): + return self.devices[0].platform._get_cl_version() + + Context.__repr__ = context_repr + from pytools import memoize_method + Context._get_cl_version = memoize_method(context_get_cl_version) + + # }}} + + # {{{ CommandQueue + + def command_queue_enter(self): + return self + + def command_queue_exit(self, exc_type, exc_val, exc_tb): + self.finish() + + def command_queue_get_cl_version(self): + return self.context._get_cl_version() + + CommandQueue.__enter__ = command_queue_enter + CommandQueue.__exit__ = command_queue_exit + CommandQueue._get_cl_version = memoize_method(command_queue_get_cl_version) + + # }}} + + # {{{ _Program (the internal, non-caching version) + + def program_get_build_logs(self): + build_logs = [] + for dev in self.get_info(_cl.program_info.DEVICES): + try: + log = self.get_build_info(dev, program_build_info.LOG) + except Exception: + log = "" + + build_logs.append((dev, log)) + + return build_logs + + def program_build(self, options_bytes, devices=None): + err = None + try: + self._build(options=options_bytes, devices=devices) + except Error as e: + msg = str(e) + "\n\n" + (75*"="+"\n").join( + "Build on %s:\n\n%s" % (dev, log) + for dev, log in self._get_build_logs()) + code = e.code + routine = e.routine + + err = _cl.RuntimeError( + _cl._ErrorRecord( + msg=msg, + code=code, + routine=routine)) + + if err is not None: + # Python 3.2 outputs the whole list of currently active exceptions + # This serves to remove one (redundant) level from that nesting. + raise err + + message = (75*"="+"\n").join( + "Build on %s succeeded, but said:\n\n%s" % (dev, log) + for dev, log in self._get_build_logs() + if log is not None and log.strip()) + + if message: + if self.kind() == program_kind.SOURCE: + build_type = "From-source build" + elif self.kind() == program_kind.BINARY: + build_type = "From-binary build" + else: + build_type = "Build" + + compiler_output("%s succeeded, but resulted in non-empty logs:\n%s" + % (build_type, message)) + + return self + + _cl._Program._get_build_logs = program_get_build_logs + _cl._Program.build = program_build + + # }}} + + # {{{ Event + class ProfilingInfoGetter: + def __init__(self, event): + self.event = event + + def __getattr__(self, name): + info_cls = _cl.profiling_info + + try: + inf_attr = getattr(info_cls, name.upper()) + except AttributeError: + raise AttributeError("%s has no attribute '%s'" + % (type(self), name)) + else: + return self.event.get_profiling_info(inf_attr) + + _cl.Event.profile = property(ProfilingInfoGetter) + + # }}} + + # {{{ Kernel + + kernel_old_init = Kernel.__init__ + kernel_old_get_info = Kernel.get_info + kernel_old_get_work_group_info = Kernel.get_work_group_info + + def kernel_init(self, prg, name): + if not isinstance(prg, _cl._Program): + prg = prg._get_prg() + + kernel_old_init(self, prg, name) + + self._setup(prg) + + def kernel__setup(self, prg): + self._source = getattr(prg, "_source", None) + + from pyopencl.invoker import generate_enqueue_and_set_args + self._enqueue, self._set_args = generate_enqueue_and_set_args( + self.function_name, self.num_args, self.num_args, + None, + warn_about_arg_count_bug=None, + work_around_arg_count_bug=None) + + self._wg_info_cache = {} + return self + + def kernel_set_scalar_arg_dtypes(self, scalar_arg_dtypes): + self._scalar_arg_dtypes = tuple(scalar_arg_dtypes) + + # {{{ arg counting bug handling + + # For example: + # https://github.com/pocl/pocl/issues/197 + # (but Apple CPU has a similar bug) + + work_around_arg_count_bug = False + warn_about_arg_count_bug = False + + from pyopencl.characterize import has_struct_arg_count_bug + + count_bug_per_dev = [ + has_struct_arg_count_bug(dev, self.context) + for dev in self.context.devices] + + from pytools import single_valued + if any(count_bug_per_dev): + if all(count_bug_per_dev): + work_around_arg_count_bug = single_valued(count_bug_per_dev) + else: + warn_about_arg_count_bug = True + + # }}} + + from pyopencl.invoker import generate_enqueue_and_set_args + self._enqueue, self._set_args = generate_enqueue_and_set_args( + self.function_name, + len(scalar_arg_dtypes), self.num_args, + self._scalar_arg_dtypes, + warn_about_arg_count_bug=warn_about_arg_count_bug, + work_around_arg_count_bug=work_around_arg_count_bug) + + def kernel_get_work_group_info(self, param, device): + try: + return self._wg_info_cache[param, device] + except KeyError: + pass + + result = kernel_old_get_work_group_info(self, param, device) + self._wg_info_cache[param, device] = result + return result + + def kernel_set_args(self, *args, **kwargs): + # Need to dupicate the 'self' argument for dynamically generated method + return self._set_args(self, *args, **kwargs) + + def kernel_call(self, queue, global_size, local_size, *args, **kwargs): + # __call__ can't be overridden directly, so we need this + # trampoline hack. + return self._enqueue(self, queue, global_size, local_size, *args, **kwargs) + + def kernel_capture_call(self, filename, queue, global_size, local_size, + *args, **kwargs): + from pyopencl.capture_call import capture_kernel_call + capture_kernel_call(self, filename, queue, global_size, local_size, + *args, **kwargs) + + def kernel_get_info(self, param_name): + val = kernel_old_get_info(self, param_name) + + if isinstance(val, _Program): + return Program(val) + else: + return val + + Kernel.__init__ = kernel_init + Kernel._setup = kernel__setup + Kernel.get_work_group_info = kernel_get_work_group_info + Kernel.set_scalar_arg_dtypes = kernel_set_scalar_arg_dtypes + Kernel.set_args = kernel_set_args + Kernel.__call__ = kernel_call + Kernel.capture_call = kernel_capture_call + Kernel.get_info = kernel_get_info + + # }}} + + # {{{ ImageFormat + + def image_format_repr(self): + return "ImageFormat(%s, %s)" % ( + channel_order.to_string(self.channel_order, + ""), + channel_type.to_string(self.channel_data_type, + "")) + + def image_format_eq(self, other): + return (self.channel_order == other.channel_order + and self.channel_data_type == other.channel_data_type) + + def image_format_ne(self, other): + return not image_format_eq(self, other) + + def image_format_hash(self): + return hash((type(self), self.channel_order, self.channel_data_type)) + + ImageFormat.__repr__ = image_format_repr + ImageFormat.__eq__ = image_format_eq + ImageFormat.__ne__ = image_format_ne + ImageFormat.__hash__ = image_format_hash + + # }}} + + # {{{ Image + + image_old_init = Image.__init__ + + def image_init(self, context, flags, format, shape=None, pitches=None, + hostbuf=None, is_array=False, buffer=None): + + if shape is None and hostbuf is None: + raise Error("'shape' must be passed if 'hostbuf' is not given") + + if shape is None and hostbuf is not None: + shape = hostbuf.shape + + if hostbuf is not None and not \ + (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)): + from warnings import warn + warn("'hostbuf' was passed, but no memory flags to make use of it.") + + if hostbuf is None and pitches is not None: + raise Error("'pitches' may only be given if 'hostbuf' is given") + + if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2): + if buffer is not None and is_array: + raise ValueError( + "'buffer' and 'is_array' are mutually exclusive") + + if len(shape) == 3: + if buffer is not None: + raise TypeError( + "'buffer' argument is not supported for 3D arrays") + elif is_array: + image_type = mem_object_type.IMAGE2D_ARRAY + else: + image_type = mem_object_type.IMAGE3D + + elif len(shape) == 2: + if buffer is not None: + raise TypeError( + "'buffer' argument is not supported for 2D arrays") + elif is_array: + image_type = mem_object_type.IMAGE1D_ARRAY + else: + image_type = mem_object_type.IMAGE2D + + elif len(shape) == 1: + if buffer is not None: + image_type = mem_object_type.IMAGE1D_BUFFER + elif is_array: + raise TypeError("array of zero-dimensional images not supported") + else: + image_type = mem_object_type.IMAGE1D + + else: + raise ValueError("images cannot have more than three dimensions") + + desc = ImageDescriptor() + + desc.image_type = image_type + desc.shape = shape # also sets desc.array_size + + if pitches is None: + desc.pitches = (0, 0) + else: + desc.pitches = pitches + + desc.num_mip_levels = 0 # per CL 1.2 spec + desc.num_samples = 0 # per CL 1.2 spec + desc.buffer = buffer + + image_old_init(self, context, flags, format, desc, hostbuf) + else: + # legacy init for CL 1.1 and older + if is_array: + raise TypeError("'is_array=True' is not supported for CL < 1.2") + # if num_mip_levels is not None: + # raise TypeError( + # "'num_mip_levels' argument is not supported for CL < 1.2") + # if num_samples is not None: + # raise TypeError( + # "'num_samples' argument is not supported for CL < 1.2") + if buffer is not None: + raise TypeError("'buffer' argument is not supported for CL < 1.2") + + image_old_init(self, context, flags, format, shape, + pitches, hostbuf) + + class _ImageInfoGetter: + def __init__(self, event): + from warnings import warn + warn("Image.image.attr is deprecated. " + "Use Image.attr directly, instead.") + + self.event = event + + def __getattr__(self, name): + try: + inf_attr = getattr(_cl.image_info, name.upper()) + except AttributeError: + raise AttributeError("%s has no attribute '%s'" + % (type(self), name)) + else: + return self.event.get_image_info(inf_attr) + + def image_shape(self): + if self.type == mem_object_type.IMAGE2D: + return (self.width, self.height) + elif self.type == mem_object_type.IMAGE3D: + return (self.width, self.height, self.depth) + else: + raise LogicError("only images have shapes") + + Image.__init__ = image_init + Image.image = property(_ImageInfoGetter) + Image.shape = property(image_shape) + + # }}} + + # {{{ Error + + def error_str(self): + val = self.what + try: + val.routine + except AttributeError: + return str(val) + else: + result = "" + if val.code() != status_code.SUCCESS: + result = status_code.to_string( + val.code(), "") + routine = val.routine() + if routine: + result = "%s failed: %s" % (routine, result) + what = val.what() + if what: + if result: + result += " - " + result += what + return result + + def error_code(self): + return self.args[0].code() + + def error_routine(self): + return self.args[0].routine() + + def error_what(self): + return self.args[0] + + Error.__str__ = error_str + Error.code = property(error_code) + Error.routine = property(error_routine) + Error.what = property(error_what) + + # }}} + + # {{{ MemoryMap + + def memory_map_enter(self): + return self + + def memory_map_exit(self, exc_type, exc_val, exc_tb): + self.release() + + MemoryMap.__doc__ = """ + This class may also be used as a context manager in a ``with`` statement. + The memory corresponding to this object will be unmapped when + this object is deleted or :meth:`release` is called. + + .. automethod:: release + """ + MemoryMap.__enter__ = memory_map_enter + MemoryMap.__exit__ = memory_map_exit + + # }}} + + # {{{ SVMAllocation + + if get_cl_header_version() >= (2, 0): + SVMAllocation.__doc__ = """An object whose lifetime is tied to an allocation of shared virtual memory. + + .. note:: + + Most likely, you will not want to use this directly, but rather + :func:`svm_empty` and related functions which allow access to this + functionality using a friendlier, more Pythonic interface. + + .. versionadded:: 2016.2 + + .. automethod:: __init__(self, ctx, size, alignment, flags=None) + .. automethod:: release + .. automethod:: enqueue_release + """ + + if get_cl_header_version() >= (2, 0): + svmallocation_old_init = SVMAllocation.__init__ + + def svmallocation_init(self, ctx, size, alignment, flags, _interface=None): + """ + :arg ctx: a :class:`Context` + :arg flags: some of :class:`svm_mem_flags`. + """ + svmallocation_old_init(self, ctx, size, alignment, flags) + + read_write = ( + flags & mem_flags.WRITE_ONLY != 0 + or flags & mem_flags.READ_WRITE != 0) + + _interface["data"] = ( + int(self._ptr_as_int()), not read_write) + + self.__array_interface__ = _interface + + if get_cl_header_version() >= (2, 0): + SVMAllocation.__init__ = svmallocation_init + + # }}} + + # {{{ SVM + + if get_cl_header_version() >= (2, 0): + SVM.__doc__ = """Tags an object exhibiting the Python buffer interface (such as a + :class:`numpy.ndarray`) as referring to shared virtual memory. + + Depending on the features of the OpenCL implementation, the following + types of objects may be passed to/wrapped in this type: + + * coarse-grain shared memory as returned by (e.g.) :func:`csvm_empty` + for any implementation of OpenCL 2.0. + + This is how coarse-grain SVM may be used from both host and device:: + + svm_ary = cl.SVM( + cl.csvm_empty(ctx, 1000, np.float32, alignment=64)) + assert isinstance(svm_ary.mem, np.ndarray) + + with svm_ary.map_rw(queue) as ary: + ary.fill(17) # use from host + + prg.twice(queue, svm_ary.mem.shape, None, svm_ary) + + * fine-grain shared memory as returned by (e.g.) :func:`fsvm_empty`, + if the implementation supports fine-grained shared virtual memory. + This memory may directly be passed to a kernel:: + + ary = cl.fsvm_empty(ctx, 1000, np.float32) + assert isinstance(ary, np.ndarray) + + prg.twice(queue, ary.shape, None, cl.SVM(ary)) + queue.finish() # synchronize + print(ary) # access from host + + Observe how mapping (as needed in coarse-grain SVM) is no longer + necessary. + + * any :class:`numpy.ndarray` (or other Python object with a buffer + interface) if the implementation supports fine-grained *system* + shared virtual memory. + + This is how plain :mod:`numpy` arrays may directly be passed to a + kernel:: + + ary = np.zeros(1000, np.float32) + prg.twice(queue, ary.shape, None, cl.SVM(ary)) + queue.finish() # synchronize + print(ary) # access from host + + Objects of this type may be passed to kernel calls and + :func:`enqueue_copy`. Coarse-grain shared-memory *must* be mapped + into host address space using :meth:`map` before being accessed + through the :mod:`numpy` interface. + + .. note:: + + This object merely serves as a 'tag' that changes the behavior + of functions to which it is passed. It has no special management + relationship to the memory it tags. For example, it is permissible + to grab a :mod:`numpy.array` out of :attr:`SVM.mem` of one + :class:`SVM` instance and use the array to construct another. + Neither of the tags need to be kept alive. + + .. versionadded:: 2016.2 + + .. attribute:: mem + + The wrapped object. + + .. automethod:: __init__ + .. automethod:: map + .. automethod:: map_ro + .. automethod:: map_rw + .. automethod:: as_buffer + """ + + if get_cl_header_version() >= (2, 0): + svm_old_init = SVM.__init__ + + def svm_init(self, mem): + svm_old_init(self, mem) + + self.mem = mem + + def svm_map(self, queue, flags, is_blocking=True, wait_for=None): + """ + :arg is_blocking: If *False*, subsequent code must wait on + :attr:`SVMMap.event` in the returned object before accessing the + mapped memory. + :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to + read-write. + :returns: an :class:`SVMMap` instance + + |std-enqueue-blurb| + """ + return SVMMap( + self, + queue, + _cl._enqueue_svm_map(queue, is_blocking, flags, self, wait_for)) + + def svm_map_ro(self, queue, is_blocking=True, wait_for=None): + """Like :meth:`map`, but with *flags* set for a read-only map.""" + + return self.map(queue, map_flags.READ, + is_blocking=is_blocking, wait_for=wait_for) + + def svm_map_rw(self, queue, is_blocking=True, wait_for=None): + """Like :meth:`map`, but with *flags* set for a read-only map.""" + + return self.map(queue, map_flags.READ | map_flags.WRITE, + is_blocking=is_blocking, wait_for=wait_for) + + def svm__enqueue_unmap(self, queue, wait_for=None): + return _cl._enqueue_svm_unmap(queue, self, wait_for) + + def svm_as_buffer(self, ctx, flags=None): + """ + :arg ctx: a :class:`Context` + :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to + read-write. + :returns: a :class:`Buffer` corresponding to *self*. + + The memory referred to by this object must not be freed before + the returned :class:`Buffer` is released. + """ + + if flags is None: + flags = mem_flags.READ_WRITE + + return Buffer(ctx, flags, size=self.mem.nbytes, hostbuf=self.mem) + + if get_cl_header_version() >= (2, 0): + SVM.__init__ = svm_init + SVM.map = svm_map + SVM.map_ro = svm_map_ro + SVM.map_rw = svm_map_rw + SVM._enqueue_unmap = svm__enqueue_unmap + SVM.as_buffer = svm_as_buffer + + # }}} + + # ORDER DEPENDENCY: Some of the above may override get_info, the effect needs + # to be visible through the attributes. So get_info attr creation needs to happen + # after the overriding is complete. + cls_to_info_cls = { + _cl.Platform: (_cl.Platform.get_info, _cl.platform_info, []), + _cl.Device: (_cl.Device.get_info, _cl.device_info, + ["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"]), + _cl.Context: (_cl.Context.get_info, _cl.context_info, []), + _cl.CommandQueue: (_cl.CommandQueue.get_info, _cl.command_queue_info, + ["CONTEXT", "DEVICE"]), + _cl.Event: (_cl.Event.get_info, _cl.event_info, []), + _cl.MemoryObjectHolder: + (MemoryObjectHolder.get_info, _cl.mem_info, []), + Image: (_cl.Image.get_image_info, _cl.image_info, []), + Program: (Program.get_info, _cl.program_info, []), + Kernel: (Kernel.get_info, _cl.kernel_info, []), + _cl.Sampler: (Sampler.get_info, _cl.sampler_info, []), + } + + def to_string(cls, value, default_format=None): + for name in dir(cls): + if (not name.startswith("_") and getattr(cls, name) == value): + return name + + if default_format is None: + raise ValueError("a name for value %d was not found in %s" + % (value, cls.__name__)) + else: + return default_format % value + + for cls in CONSTANT_CLASSES: + cls.to_string = classmethod(to_string) + + # {{{ get_info attributes ------------------------------------------------- + + def make_getinfo(info_method, info_name, info_attr): + def result(self): + return info_method(self, info_attr) + + return property(result) + + def make_cacheable_getinfo(info_method, info_name, cache_attr, info_attr): + def result(self): + try: + return getattr(self, cache_attr) + except AttributeError: + pass + + result = info_method(self, info_attr) + setattr(self, cache_attr, result) + return result + + return property(result) + + for cls, (info_method, info_class, cacheable_attrs) \ + in six.iteritems(cls_to_info_cls): + for info_name, info_value in six.iteritems(info_class.__dict__): + if info_name == "to_string" or info_name.startswith("_"): + continue + + info_lower = info_name.lower() + info_constant = getattr(info_class, info_name) + if info_name in cacheable_attrs: + cache_attr = intern("_info_cache_"+info_lower) + setattr(cls, info_lower, make_cacheable_getinfo( + info_method, info_lower, cache_attr, info_constant)) + else: + setattr(cls, info_lower, make_getinfo( + info_method, info_name, info_constant)) + + # }}} + + if _cl.have_gl(): + def gl_object_get_gl_object(self): + return self.get_gl_object_info()[1] + + GLBuffer.gl_object = property(gl_object_get_gl_object) + GLTexture.gl_object = property(gl_object_get_gl_object) + + +_add_functionality() + +# }}} + + # {{{ create_some_context -def create_some_context(interactive=None, answers=None, cache_dir=None): +def create_some_context(interactive=None, answers=None): import os if answers is None: if "PYOPENCL_CTX" in os.environ: @@ -553,7 +1336,7 @@ def create_some_context(interactive=None, answers=None, cache_dir=None): from pyopencl.tools import get_test_platforms_and_devices for plat, devs in get_test_platforms_and_devices(): for dev in devs: - return Context([dev], cache_dir=cache_dir) + return Context([dev]) if answers is not None: pre_provided_answers = answers @@ -668,7 +1451,7 @@ def create_some_context(interactive=None, answers=None, cache_dir=None): raise RuntimeError("not all provided choices were used by " "create_some_context. (left over: '%s')" % ":".join(answers)) - return Context(devices, cache_dir=cache_dir) + return Context(devices) _csc = create_some_context @@ -676,46 +1459,54 @@ _csc = create_some_context # }}} -# {{{ enqueue_copy +# {{{ SVMMap -def _mark_copy_deprecated(func): - def new_func(*args, **kwargs): - from warnings import warn - warn("'%s' has been deprecated in version 2011.1. Please use " - "enqueue_copy() instead." % func.__name__[1:], DeprecationWarning, - stacklevel=2) - return func(*args, **kwargs) +class SVMMap(object): + """ + .. attribute:: event - try: - from functools import update_wrapper - except ImportError: - pass - else: - try: - update_wrapper(new_func, func) - except AttributeError: - pass + .. versionadded:: 2016.2 + + .. automethod:: release + + This class may also be used as a context manager in a ``with`` statement. + :meth:`release` will be called upon exit from the ``with`` region. + The value returned to the ``as`` part of the context manager is the + mapped Python object (e.g. a :mod:`numpy` array). + """ + def __init__(self, svm, queue, event): + self.svm = svm + self.queue = queue + self.event = event - return new_func + def __del__(self): + if self.svm is not None: + self.release() + def __enter__(self): + return self.svm.mem -enqueue_read_image = _mark_copy_deprecated(_cl._enqueue_read_image) -enqueue_write_image = _mark_copy_deprecated(_cl._enqueue_write_image) -enqueue_copy_image = _mark_copy_deprecated(_cl._enqueue_copy_image) -enqueue_copy_image_to_buffer = _mark_copy_deprecated( - _cl._enqueue_copy_image_to_buffer) -enqueue_copy_buffer_to_image = _mark_copy_deprecated( - _cl._enqueue_copy_buffer_to_image) -enqueue_read_buffer = _mark_copy_deprecated(_cl._enqueue_read_buffer) -enqueue_write_buffer = _mark_copy_deprecated(_cl._enqueue_write_buffer) -enqueue_copy_buffer = _mark_copy_deprecated(_cl._enqueue_copy_buffer) + def __exit__(self, exc_type, exc_val, exc_tb): + self.release() + def release(self, queue=None, wait_for=None): + """ + :arg queue: a :class:`pyopencl.CommandQueue`. Defaults to the one + with which the map was created, if not specified. + :returns: a :class:`pyopencl.Event` -if _cl.get_cl_header_version() >= (1, 1): - enqueue_read_buffer_rect = _mark_copy_deprecated(_cl._enqueue_read_buffer_rect) - enqueue_write_buffer_rect = _mark_copy_deprecated(_cl._enqueue_write_buffer_rect) - enqueue_copy_buffer_rect = _mark_copy_deprecated(_cl._enqueue_copy_buffer_rect) + |std-enqueue-blurb| + """ + evt = self.svm._enqueue_unmap(self.queue) + self.svm = None + + return evt + +# }}} + + +# {{{ enqueue_copy def enqueue_copy(queue, dest, src, **kwargs): """Copy from :class:`Image`, :class:`Buffer` or the host to @@ -891,12 +1682,13 @@ def enqueue_copy(queue, dest, src, **kwargs): else: raise ValueError("invalid dest mem object type") - elif isinstance(dest, SVM): + elif get_cl_header_version() >= (2, 0) and isinstance(dest, SVM): # to SVM if isinstance(src, SVM): src = src.mem return _cl._enqueue_svm_memcpy(queue, dest.mem, src, **kwargs) + else: # assume to-host @@ -1039,6 +1831,10 @@ def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None): from warnings import warn warn("The context for this queue does not declare OpenCL 1.2 support, so " "the next thing you might see is a crash") + + if _PYPY and isinstance(pattern, np.generic): + pattern = np.asarray(pattern) + return _cl._enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for) # }}} @@ -1046,6 +1842,48 @@ def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None): # {{{ numpy-like svm allocation +def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None): + """Fill shared virtual memory with a pattern. + + :arg dest: a Python buffer object, optionally wrapped in an :class:`SVM` object + :arg pattern: a Python buffer object (e.g. a :class:`numpy.ndarray` with the + fill pattern to be used. + :arg byte_count: The size of the memory to be fill. Defaults to the + entirety of *dest*. + + |std-enqueue-blurb| + + .. versionadded:: 2016.2 + """ + + if not isinstance(dest, SVM): + dest = SVM(dest) + + return _cl._enqueue_svm_memfill( + queue, dest, pattern, byte_count=None, wait_for=None) + + +def enqueue_svm_migratemem(queue, svms, flags, wait_for=None): + """ + :arg svms: a collection of Python buffer objects (e.g. :mod:`numpy` + arrrays), optionally wrapped in :class:`SVM` objects. + :arg flags: a combination of :class:`mem_migration_flags` + + |std-enqueue-blurb| + + .. versionadded:: 2016.2 + + This function requires OpenCL 2.1. + """ + + return _cl._enqueue_svm_migratemem( + queue, + [svm.mem if isinstance(svm, SVM) else svm + for svm in svms], + flags, + wait_for) + + def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None): """Allocate an empty :class:`numpy.ndarray` of the given *shape*, *dtype* and *order*. (See :func:`numpy.empty` for the meaning of these arguments.) @@ -1193,4 +2031,14 @@ def fsvm_empty_like(ctx, ary, alignment=None): # }}} + +_KERNEL_ARG_CLASSES = ( + MemoryObjectHolder, + Sampler, + LocalMemory, + ) +if get_cl_header_version() >= (2, 0): + _KERNEL_ARG_CLASSES = _KERNEL_ARG_CLASSES + (SVM,) + + # vim: foldmethod=marker diff --git a/pyopencl/array.py b/pyopencl/array.py index 704c495b4bff7ceed3d4808244a8bdb844f267b5..a4a5f4cffa57a314192878aec926f99285954b78 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -663,7 +663,8 @@ class Array(object): if ary is None: ary = np.empty(self.shape, self.dtype) - ary = _as_strided(ary, strides=self.strides) + if self.strides != ary.strides: + ary = _as_strided(ary, strides=self.strides) else: if ary.size != self.size: raise TypeError("'ary' has non-matching size") diff --git a/pyopencl/cache.py b/pyopencl/cache.py index 22e55c404a7a5d742f8e511f04308ed437acdf8c..48b6270edcdc107b1aa006b4202feb3e6a29b36f 100644 --- a/pyopencl/cache.py +++ b/pyopencl/cache.py @@ -26,7 +26,7 @@ THE SOFTWARE. import six from six.moves import zip -import pyopencl.cffi_cl as _cl +import pyopencl._cl as _cl import re import sys import os @@ -374,7 +374,7 @@ def _create_built_program_from_source_cached(ctx, src, options_bytes, if log is not None and log.strip()) if message: - from pyopencl.cffi_cl import compiler_output + from pyopencl import compiler_output compiler_output( "Built kernel retrieved from cache. Original from-source " "build had warnings:\n"+message) diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py deleted file mode 100644 index c5effc7b35dbd3e64b88b8eaa56016809797f147..0000000000000000000000000000000000000000 --- a/pyopencl/cffi_cl.py +++ /dev/null @@ -1,2954 +0,0 @@ -from __future__ import division, absolute_import - -__copyright__ = """ -Copyright (C) 2013 Marko Bencun -Copyright (C) 2014 Andreas Kloeckner -Copyright (C) 2014 Yichao Yu -""" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -import six -from six.moves import map, range, zip, intern - -import warnings -import numpy as np -import sys -import re - -from pytools import memoize_method - -from pyopencl._cffi import ffi as _ffi -from .compyte.array import f_contiguous_strides, c_contiguous_strides - - -from pyopencl._cffi import lib as _lib - -import logging -logger = logging.getLogger(__name__) - - -class _CLKernelArg(object): - pass - - -# {{{ hook up connections between the wrapper and the interperter - -import gc -_py_gc = _ffi.callback('int(void)')(gc.collect) - -_pyrefs = {} - - -@_ffi.callback('void(void*)') -def _py_deref(handle): - try: - del _pyrefs[handle] - except Exception: - pass - - -# return a new reference of the object pointed to by the handle. -# The return value might be different with the input (on PyPy). -# _py_deref should be called (once) when the object is not needed anymore. -@_ffi.callback('void*(void*)') -def _py_ref(handle): - obj = _ffi.from_handle(handle) - handle = _ffi.new_handle(obj) - _pyrefs[handle] = handle - return handle - - -@_ffi.callback('void(void*, cl_int)') -def _py_call(handle, status): - _ffi.from_handle(handle)(status) - - -_lib.set_py_funcs(_py_gc, _py_ref, _py_deref, _py_call) - -# }}} - - -# {{{ compatibility shims - -# are we running on pypy? -_PYPY = '__pypy__' in sys.builtin_module_names -_CPY2 = not _PYPY and sys.version_info < (3,) - -try: - _unicode = eval('unicode') - _ffi_pystr = _ffi.string -except Exception: - _unicode = str - _bytes = bytes - - def _ffi_pystr(s): - return _ffi.string(s).decode() if s else None -else: - try: - _bytes = bytes - except Exception: - _bytes = str - - -def _to_cstring(s): - if isinstance(s, _unicode): - return s.encode() - return s - -# }}} - - -# {{{ wrapper tools - -# {{{ _CArray helper classes - -class _CArray(object): - def __init__(self, ptr): - self.ptr = ptr - self.size = _ffi.new('uint32_t*') - - def __del__(self): - if self.ptr != _ffi.NULL: - _lib.free_pointer(self.ptr[0]) - - def __getitem__(self, key): - return self.ptr[0].__getitem__(key) - - def __iter__(self): - for i in range(self.size[0]): - yield self[i] - -# }}} - - -# {{{ GetInfo support - -def _generic_info_to_python(info): - type_ = _ffi_pystr(info.type) - - if info.free_type: - _lib.free_pointer(info.type) - - value = _ffi.cast(type_, info.value) - - if info.opaque_class != _lib.CLASS_NONE: - klass = { - _lib.CLASS_PLATFORM: Platform, - _lib.CLASS_DEVICE: Device, - _lib.CLASS_KERNEL: Kernel, - _lib.CLASS_CONTEXT: Context, - _lib.CLASS_BUFFER: Buffer, - _lib.CLASS_PROGRAM: _Program, - _lib.CLASS_EVENT: Event, - _lib.CLASS_COMMAND_QUEUE: CommandQueue - }[info.opaque_class] - - if klass is _Program: - def create_inst(val): - from pyopencl import Program - return Program(_Program._create(val)) - - else: - create_inst = klass._create - - if type_.endswith(']'): - ret = list(map(create_inst, value)) - _lib.free_pointer(info.value) - return ret - else: - return create_inst(value) - - if type_ == 'char*': - ret = _ffi_pystr(value) - elif type_ == 'cl_device_topology_amd*': - ret = DeviceTopologyAmd( - value.pcie.bus, value.pcie.device, value.pcie.function) - elif type_ == 'cl_image_format*': - ret = ImageFormat(value.image_channel_order, - value.image_channel_data_type) - elif type_.startswith('char*['): - ret = list(map(_ffi_pystr, value)) - _lib.free_pointer_array(info.value, len(value)) - elif type_.endswith(']'): - if type_.startswith('char['): - # This is usually a CL binary, which may contain NUL characters - # that should be preserved. - ret = _bytes(_ffi.buffer(value)) - - elif type_.startswith('generic_info['): - ret = list(map(_generic_info_to_python, value)) - elif type_.startswith('cl_image_format['): - ret = [ImageFormat(imf.image_channel_order, - imf.image_channel_data_type) - for imf in value] - else: - ret = list(value) - else: - ret = value[0] - if info.free_value: - _lib.free_pointer(info.value) - return ret - -# }}} - - -def _clobj_list(objs): - if objs is None: - return _ffi.NULL, 0 - return [ev.ptr for ev in objs], len(objs) - - -# {{{ common base class - -class _Common(object): - @classmethod - def _create(cls, ptr): - self = cls.__new__(cls) - self.ptr = ptr - return self - ptr = _ffi.NULL - - # {{{ cleanup - - # The module-global _lib variable may get set to None during interpreter - # cleanup before we're done cleaning up CL objects. (Symbols starting with - # an underscore even get cleared first [1]--although it's unclear that that - # really matters.) To retain our ability to clean up objects, retain a - # reference to the _lib module. - # - # [1] https://www.python.org/doc/essays/cleanup/ - - _retained_lib = _lib - - def __del__(self): - self._retained_lib.clobj__delete(self.ptr) - - # }}} - - def __eq__(self, other): - return other.int_ptr == self.int_ptr - - def __ne__(self, other): - return not self.__eq__(other) - - def __hash__(self): - return _lib.clobj__int_ptr(self.ptr) - - def get_info(self, param): - info = _ffi.new('generic_info*') - _handle_error(_lib.clobj__get_info(self.ptr, param, info)) - return _generic_info_to_python(info) - - @property - def int_ptr(self): - return _lib.clobj__int_ptr(self.ptr) - - @classmethod - def from_int_ptr(cls, int_ptr_value, retain=True): - """Constructs a :mod:`pyopencl` handle from a C-level pointer (given as - the integer *int_ptr_value*). If *retain* is *True* (the default) - :mod:`pyopencl` will call ``clRetainXXX`` on the provided object. If - the previous owner of the object will *not* release the reference, - *retain* should be set to *False*, to effectively transfer ownership to - :mod:`pyopencl`. - - .. versionchanged:: 2016.1 - - *retain* added - """ - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.clobj__from_int_ptr( - ptr, int_ptr_value, getattr(_lib, 'CLASS_%s' % cls._id.upper()), - retain)) - return cls._create(ptr[0]) - -# }}} - -# }}} - - -def get_cl_header_version(): - v = _lib.get_cl_version() - return (v >> (3 * 4), - (v >> (1 * 4)) & 0xff) - - -# {{{ constants - -_constants = {} - - -# {{{ constant classes - -class _ConstantsNamespace(object): - def __init__(self): - raise RuntimeError("This class cannot be instantiated.") - - @classmethod - def to_string(cls, value, default_format=None): - for name in dir(cls): - if (not name.startswith("_") and getattr(cls, name) == value): - return name - - if default_format is None: - raise ValueError("a name for value %d was not found in %s" - % (value, cls.__name__)) - else: - return default_format % value - - -# /!\ If you add anything here, add it to pyopencl/__init__.py as well. - -class program_kind(_ConstantsNamespace): # noqa - pass - - -class status_code(_ConstantsNamespace): # noqa - pass - - -class platform_info(_ConstantsNamespace): # noqa - pass - - -class device_type(_ConstantsNamespace): # noqa - @classmethod - def to_string(cls, value, default_format=None): - for name in dir(cls): - if name in ("DEFAULT", "ALL"): - continue - if not name.startswith("_"): - bitfield = getattr(cls, name) - if (isinstance(bitfield, six.integer_types) - and ((bitfield & value) == bitfield)): - return name - - if default_format is None: - raise ValueError("a name for value %d was not found in %s" - % (value, cls.__name__)) - else: - return default_format % value - - -class device_info(_ConstantsNamespace): # noqa - pass - - -class device_fp_config(_ConstantsNamespace): # noqa - pass - - -class device_mem_cache_type(_ConstantsNamespace): # noqa - pass - - -class device_local_mem_type(_ConstantsNamespace): # noqa - pass - - -class device_exec_capabilities(_ConstantsNamespace): # noqa - pass - - -class device_svm_capabilities(_ConstantsNamespace): # noqa - pass - - -class command_queue_properties(_ConstantsNamespace): # noqa - pass - - -class context_info(_ConstantsNamespace): # noqa - pass - - -class gl_context_info(_ConstantsNamespace): # noqa - pass - - -class context_properties(_ConstantsNamespace): # noqa - pass - - -class command_queue_info(_ConstantsNamespace): # noqa - pass - - -class queue_properties(_ConstantsNamespace): # noqa - pass - - -class mem_flags(_ConstantsNamespace): # noqa - @classmethod - def _writable(cls, flags): - return flags & (cls.READ_WRITE | cls.WRITE_ONLY) - - @classmethod - def _hold_host(cls, flags): - return flags & cls.USE_HOST_PTR - - @classmethod - def _use_host(cls, flags): - return flags & (cls.USE_HOST_PTR | cls.COPY_HOST_PTR) - - @classmethod - def _host_writable(cls, flags): - return cls._writable(flags) and cls._hold_host(flags) - - -class svm_mem_flags(_ConstantsNamespace): # noqa - pass - - -class channel_order(_ConstantsNamespace): # noqa - pass - - -class channel_type(_ConstantsNamespace): # noqa - pass - - -class mem_object_type(_ConstantsNamespace): # noqa - pass - - -class mem_info(_ConstantsNamespace): # noqa - pass - - -class image_info(_ConstantsNamespace): # noqa - pass - - -class addressing_mode(_ConstantsNamespace): # noqa - pass - - -class filter_mode(_ConstantsNamespace): # noqa - pass - - -class sampler_info(_ConstantsNamespace): # noqa - pass - - -class map_flags(_ConstantsNamespace): # noqa - pass - - -class program_info(_ConstantsNamespace): # noqa - pass - - -class program_build_info(_ConstantsNamespace): # noqa - pass - - -class program_binary_type(_ConstantsNamespace): # noqa - pass - - -class kernel_info(_ConstantsNamespace): # noqa - pass - - -class kernel_arg_info(_ConstantsNamespace): # noqa - pass - - -class kernel_arg_address_qualifier(_ConstantsNamespace): # noqa - pass - - -class kernel_arg_access_qualifier(_ConstantsNamespace): # noqa - pass - - -class kernel_arg_type_qualifier(_ConstantsNamespace): # noqa - pass - - -class kernel_work_group_info(_ConstantsNamespace): # noqa - pass - - -class event_info(_ConstantsNamespace): # noqa - pass - - -class command_type(_ConstantsNamespace): # noqa - pass - - -class command_execution_status(_ConstantsNamespace): # noqa - pass - - -class profiling_info(_ConstantsNamespace): # noqa - pass - - -class mem_migration_flags(_ConstantsNamespace): # noqa - pass - - -class mem_migration_flags_ext(_ConstantsNamespace): # noqa - pass - - -class device_partition_property(_ConstantsNamespace): # noqa - pass - - -class device_affinity_domain(_ConstantsNamespace): # noqa - pass - - -class gl_object_type(_ConstantsNamespace): # noqa - pass - - -class gl_texture_info(_ConstantsNamespace): # noqa - pass - - -class migrate_mem_object_flags_ext(_ConstantsNamespace): # noqa - pass - -# }}} - - -_locals = locals() - - -# TODO: constant values are cl_ulong -@_ffi.callback('void (*)(const char*, const char* name, int64_t value)') -def _constant_callback(type_, name, value): - setattr(_locals[_ffi_pystr(type_)], _ffi_pystr(name), value) # noqa - - -_lib.populate_constants(_constant_callback) - -del _locals -del _constant_callback - -# }}} - - -# {{{ exceptions - -class Error(Exception): - class _ErrorRecord(object): - __slots__ = ('_routine', '_code', '_what') - - def __init__(self, msg='', code=0, routine=''): - self._routine = routine - assert isinstance(code, six.integer_types) - self._code = code - self._what = msg - - def routine(self): - return self._routine - - def code(self): - return self._code - - def what(self): - return self._what - - def __init__(self, *a, **kw): - if len(a) == 1 and not kw and hasattr(a[0], 'what'): - super(Error, self).__init__(a[0]) - else: - super(Error, self).__init__(self._ErrorRecord(*a, **kw)) - - def __str__(self): - val = self.args[0] - try: - val.routine - except AttributeError: - return str(val) - else: - result = "" - if val.code() != status_code.SUCCESS: - result = status_code.to_string( - val.code(), "") - routine = val.routine() - if routine: - result = "%s failed: %s" % (routine, result) - what = val.what() - if what: - if result: - result += " - " - result += what - return result - - @property - def code(self): - return self.args[0].code() - - @property - def routine(self): - return self.args[0].routine() - - @property - def what(self): - return self.args[0].what() - - def is_out_of_memory(self): - # matches C implementation in src/c_wrapper/error.h - val = self.args[0] - - return (val.code == status_code.MEM_OBJECT_ALLOCATION_FAILURE - or val.code == status_code.OUT_OF_RESOURCES - or val.code == status_code.OUT_OF_HOST_MEMORY) - - -class MemoryError(Error): - pass - - -class LogicError(Error): - pass - - -_py_RuntimeError = RuntimeError - - -class RuntimeError(Error): - pass - - -def _handle_error(error): - if error == _ffi.NULL: - return - if error.other == 1: - # non-pyopencl exceptions are handled here - e = _py_RuntimeError(_ffi_pystr(error.msg)) - _lib.free_pointer(error.msg) - _lib.free_pointer(error) - raise e - if error.code == status_code.MEM_OBJECT_ALLOCATION_FAILURE: - klass = MemoryError - elif error.code <= status_code.INVALID_VALUE: - klass = LogicError - elif status_code.INVALID_VALUE < error.code < status_code.SUCCESS: - klass = RuntimeError - else: - klass = Error - - e = klass(routine=_ffi_pystr(error.routine), - code=error.code, msg=_ffi_pystr(error.msg)) - _lib.free_pointer(error.routine) - _lib.free_pointer(error.msg) - _lib.free_pointer(error) - raise e - -# }}} - - -# {{{ Platform - -class Platform(_Common): - _id = 'platform' - - def get_devices(self, device_type=device_type.ALL): - devices = _CArray(_ffi.new('clobj_t**')) - _handle_error(_lib.platform__get_devices( - self.ptr, devices.ptr, devices.size, device_type)) - return [Device._create(devices.ptr[0][i]) - for i in range(devices.size[0])] - - def __repr__(self): - return "" % (self.name, self.int_ptr) - - -def unload_platform_compiler(plat): - _handle_error(_lib.platform__unload_compiler(plat.ptr)) - - -def get_platforms(): - platforms = _CArray(_ffi.new('clobj_t**')) - _handle_error(_lib.get_platforms(platforms.ptr, platforms.size)) - return [Platform._create(platforms.ptr[0][i]) - for i in range(platforms.size[0])] - -# }}} - - -# {{{ Device - -class Device(_Common): - _id = 'device' - - def create_sub_devices(self, props): - props = tuple(props) + (0,) - devices = _CArray(_ffi.new('clobj_t**')) - _handle_error(_lib.device__create_sub_devices( - self.ptr, devices.ptr, devices.size, props)) - return [Device._create(devices.ptr[0][i]) - for i in range(devices.size[0])] - - def __repr__(self): - return "" % ( - self.name.strip(), self.platform.name.strip(), self.int_ptr) - - @property - def persistent_unique_id(self): - return (self.vendor, self.vendor_id, self.name, self.version) - -# }}} - - -# {{{ {Device,Platform}._get_cl_version - -_OPENCL_VERSION_STRING_RE = re.compile(r"^OpenCL ([0-9]+)\.([0-9]+) .*$") - - -def _platdev_get_cl_version(self): - version_string = self.version - match = _OPENCL_VERSION_STRING_RE.match(version_string) - if match is None: - raise RuntimeError("platform %s returned non-conformant " - "platform version string '%s'" % - (self, version_string)) - - return int(match.group(1)), int(match.group(2)) - - -Platform._get_cl_version = _platdev_get_cl_version -Device._get_cl_version = _platdev_get_cl_version - -# }}} - - -# {{{ Context - -def _parse_context_properties(properties): - if properties is None: - return _ffi.NULL - - props = [] - for prop_tuple in properties: - if len(prop_tuple) != 2: - raise RuntimeError("property tuple must have length 2", - status_code.INVALID_VALUE, "Context") - - prop, value = prop_tuple - if prop is None: - raise RuntimeError("invalid context property", - status_code.INVALID_VALUE, "Context") - - props.append(prop) - if prop == context_properties.PLATFORM: - props.append(value.int_ptr) - - elif prop == getattr(context_properties, "WGL_HDC_KHR", None): - props.append(ctypes.c_ssize_t(value).value) - - elif prop in [getattr(context_properties, key, None) for key in ( - 'CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE', - 'GL_CONTEXT_KHR', - 'EGL_DISPLAY_KHR', - 'GLX_DISPLAY_KHR', - 'CGL_SHAREGROUP_KHR', - )]: - - from ctypes import _Pointer, cast - if isinstance(value, _Pointer): - val = cast(value, ctypes.c_void_p).value - else: - val = int(value) - - if not val: - raise LogicError("You most likely have not initialized " - "OpenGL properly.", - status_code.INVALID_VALUE, "Context") - props.append(val) - else: - raise RuntimeError("invalid context property", - status_code.INVALID_VALUE, "Context") - props.append(0) - return props - - -class Context(_Common): - _id = 'context' - - def __init__(self, devices=None, properties=None, dev_type=None, cache_dir=None): - c_props = _parse_context_properties(properties) - status_code = _ffi.new('cl_int*') - - _ctx = _ffi.new('clobj_t*') - if devices is not None: - # from device list - if dev_type is not None: - raise RuntimeError("one of 'devices' or 'dev_type' " - "must be None", - status_code.INVALID_VALUE, "Context") - _devices, num_devices = _clobj_list(devices) - # TODO parameter order? (for clobj_list) - _handle_error(_lib.create_context(_ctx, c_props, - num_devices, _devices)) - - else: - # from device type - if dev_type is None: - dev_type = device_type.DEFAULT - _handle_error(_lib.create_context_from_type(_ctx, c_props, - dev_type)) - - self.ptr = _ctx[0] - self.cache_dir = cache_dir - - def __repr__(self): - return "" % (self.int_ptr, - ", ".join(repr(dev) for dev in self.devices)) - - @memoize_method - def _get_cl_version(self): - return self.devices[0].platform._get_cl_version() - -# }}} - - -# {{{ CommandQueue - -class CommandQueue(_Common): - _id = 'command_queue' - - def __init__(self, context, device=None, properties=None): - if properties is None: - properties = 0 - - ptr_command_queue = _ffi.new('clobj_t*') - - _handle_error(_lib.create_command_queue( - ptr_command_queue, context.ptr, - _ffi.NULL if device is None else device.ptr, properties)) - - self.ptr = ptr_command_queue[0] - - def finish(self): - _handle_error(_lib.command_queue__finish(self.ptr)) - - def flush(self): - _handle_error(_lib.command_queue__flush(self.ptr)) - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.finish() - - def _get_cl_version(self): - return self.device._get_cl_version() - - -# }}} - - -# {{{ _norm_shape_dtype and cffi_array - -def _norm_shape_dtype(shape, dtype, order="C", strides=None, name=""): - dtype = np.dtype(dtype) - if not isinstance(shape, tuple): - try: - shape = tuple(shape) - except Exception: - shape = (shape,) - if strides is None: - if order in "cC": - strides = c_contiguous_strides(dtype.itemsize, shape) - elif order in "fF": - strides = f_contiguous_strides(dtype.itemsize, shape) - else: - raise RuntimeError("unrecognized order specifier %s" % order, - status_code.INVALID_VALUE, name) - return dtype, shape, strides - - -class cffi_array(np.ndarray): # noqa - __array_priority__ = -100.0 - - def __new__(cls, buf, shape, dtype, strides, base=None): - self = np.ndarray.__new__(cls, shape, dtype=dtype, - buffer=buf, strides=strides) - if base is None: - base = buf - self.__base = base - return self - - @property - def base(self): - return self.__base - -# }}} - - -# {{{ MemoryObjectHolder base class - -class MemoryObjectHolder(_Common, _CLKernelArg): - def get_host_array(self, shape, dtype, order="C"): - dtype, shape, strides = _norm_shape_dtype( - shape, dtype, order, None, 'MemoryObjectHolder.get_host_array') - _hostptr = _ffi.new('void**') - _size = _ffi.new('size_t*') - _handle_error(_lib.memory_object__get_host_array(self.ptr, _hostptr, - _size)) - ary = cffi_array(_ffi.buffer(_hostptr[0], _size[0]), shape, - dtype, strides, self) - if ary.nbytes > _size[0]: - raise LogicError("Resulting array is larger than memory object.", - status_code.INVALID_VALUE, - "MemoryObjectHolder.get_host_array") - return ary - -# }}} - - -# {{{ MemoryObject - -class MemoryObject(MemoryObjectHolder): - def __init__(self, hostbuf=None): - self.__hostbuf = hostbuf - - def _handle_buf_flags(self, flags): - if self.__hostbuf is None: - return _ffi.NULL, 0, None - if not mem_flags._use_host(flags): - warnings.warn("'hostbuf' was passed, but no memory flags " - "to make use of it.") - - need_retain = mem_flags._hold_host(flags) - c_hostbuf, hostbuf_size, retained_buf = _c_buffer_from_obj( - self.__hostbuf, writable=mem_flags._host_writable(flags), - retain=need_retain) - if need_retain: - self.__retained_buf = retained_buf - return c_hostbuf, hostbuf_size, retained_buf - - @property - def hostbuf(self): - return self.__hostbuf - - def release(self): - _handle_error(_lib.memory_object__release(self.ptr)) - -# }}} - - -# {{{ MemoryMap - -class MemoryMap(_Common): - """ - This class may also be used as a context manager in a ``with`` statement. - The memory corresponding to this object will be unmapped when - this object is deleted or :meth:`release` is called. - - .. automethod:: release - """ - - @classmethod - def _create(cls, ptr, shape, typestr, strides): - self = _Common._create.__func__(cls, ptr) - self.__array_interface__ = { - 'shape': shape, - 'typestr': typestr, - 'strides': strides, - 'data': (int(_lib.clobj__int_ptr(self.ptr)), False), - 'version': 3 - } - return self - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.release() - - def release(self, queue=None, wait_for=None): - c_wait_for, num_wait_for = _clobj_list(wait_for) - _event = _ffi.new('clobj_t*') - _handle_error(_lib.memory_map__release( - self.ptr, queue.ptr if queue is not None else _ffi.NULL, - c_wait_for, num_wait_for, _event)) - return Event._create(_event[0]) - -# }}} - - -# {{{ _c_buffer_from_obj - -if _PYPY: - # Convert a Python object to a tuple (ptr, num_bytes, ref) to be able to - # pass a data stream to a C function where @ptr can be passed to a pointer - # argument and @num_bytes is the number of bytes. For certain types or - # when @writable or @retain is True, @ref is the object which keep the - # pointer converted from @ptr object valid. - - def _c_buffer_from_obj(obj, writable=False, retain=False): - if isinstance(obj, bytes): - if writable: - # bytes is not writable - raise TypeError('expected an object with a writable ' - 'buffer interface.') - if retain: - buf = _ffi.new('char[]', obj) - return (buf, len(obj), buf) - return (obj, len(obj), obj) - elif isinstance(obj, np.ndarray): - # numpy array - return (_ffi.cast('void*', obj.__array_interface__['data'][0]), - obj.nbytes, obj) - elif isinstance(obj, np.generic): - if writable or retain: - raise TypeError('expected an object with a writable ' - 'buffer interface.') - - return (_ffi.cast('void*', memoryview(obj)._pypy_raw_address()), - obj.itemsize, obj) - else: - raise LogicError("PyOpencl on PyPy only accepts numpy arrays " - "and scalars arguments", status_code.INVALID_VALUE) - -elif sys.version_info >= (2, 7, 4): - import ctypes - try: - # Python 2.6 doesn't have this. - _ssize_t = ctypes.c_ssize_t - except AttributeError: - _ssize_t = ctypes.c_size_t - - def _c_buffer_from_obj(obj, writable=False, retain=False): - # {{{ try the numpy array interface first - - # avoid slow ctypes-based buffer interface wrapper - - ary_intf = getattr(obj, "__array_interface__", None) - if ary_intf is not None: - buf_base, is_read_only = ary_intf["data"] - return ( - _ffi.cast('void*', buf_base + ary_intf.get("offset", 0)), - obj.nbytes, - obj) - - # }}} - - # {{{ fall back to the old CPython buffer protocol API - - from pyopencl._buffers import Py_buffer, PyBUF_ANY_CONTIGUOUS, PyBUF_WRITABLE - - flags = PyBUF_ANY_CONTIGUOUS - if writable: - flags |= PyBUF_WRITABLE - - with Py_buffer.from_object(obj, flags) as buf: - return _ffi.cast('void*', buf.buf), buf.len, obj - - # }}} - -else: - # Py2.6 and below - - import ctypes - try: - # Python 2.6 doesn't have this. - _ssize_t = ctypes.c_ssize_t - except AttributeError: - _ssize_t = ctypes.c_size_t - - def _c_buffer_from_obj(obj, writable=False, retain=False): - # {{{ fall back to the old CPython buffer protocol API - - addr = ctypes.c_void_p() - length = _ssize_t() - - try: - if writable: - ctypes.pythonapi.PyObject_AsWriteBuffer( - ctypes.py_object(obj), ctypes.byref(addr), - ctypes.byref(length)) - else: - ctypes.pythonapi.PyObject_AsReadBuffer( - ctypes.py_object(obj), ctypes.byref(addr), - ctypes.byref(length)) - - # ctypes check exit status of these, so no need to check - # for errors. - except TypeError: - raise LogicError(routine=None, code=status_code.INVALID_VALUE, - msg=("un-sized (pure-Python) types not " - "acceptable as arguments")) - # }}} - - return _ffi.cast('void*', addr.value), length.value, obj - -# }}} - - -# {{{ Buffer - -class Buffer(MemoryObject): - _id = 'buffer' - - def __init__(self, context, flags, size=0, hostbuf=None): - MemoryObject.__init__(self, hostbuf) - c_hostbuf, hostbuf_size, retained_buf = self._handle_buf_flags(flags) - if hostbuf is not None: - if size > hostbuf_size: - raise RuntimeError("Specified size is greater than host " - "buffer size", - status_code.INVALID_VALUE, "Buffer") - if size == 0: - size = hostbuf_size - - ptr_buffer = _ffi.new('clobj_t*') - _handle_error(_lib.create_buffer( - ptr_buffer, context.ptr, flags, size, c_hostbuf)) - self.ptr = ptr_buffer[0] - - def get_sub_region(self, origin, size, flags=0): - _sub_buf = _ffi.new('clobj_t*') - _handle_error(_lib.buffer__get_sub_region(_sub_buf, self.ptr, origin, - size, flags)) - sub_buf = self._create(_sub_buf[0]) - MemoryObject.__init__(sub_buf, None) - return sub_buf - - def __getitem__(self, idx): - if not isinstance(idx, slice): - raise TypeError("buffer subscript must be a slice object") - - start, stop, stride = idx.indices(self.size) - if stride != 1: - raise ValueError("Buffer slice must have stride 1", - status_code.INVALID_VALUE, "Buffer.__getitem__") - - assert start <= stop - - size = stop - start - return self.get_sub_region(start, size) - -# }}} - - -# {{{ SVMAllocation - -class SVMAllocation(object): - """An object whose lifetime is tied to an allocation of shared virtual memory. - - .. note:: - - Most likely, you will not want to use this directly, but rather - :func:`svm_empty` and related functions which allow access to this - functionality using a friendlier, more Pythonic interface. - - .. versionadded:: 2016.2 - - .. automethod:: __init__(self, ctx, size, alignment, flags=None) - .. automethod:: release - .. automethod:: enqueue_release - """ - def __init__(self, ctx, size, alignment, flags, _interface=None): - """ - :arg ctx: a :class:`Context` - :arg flags: some of :class:`svm_mem_flags`. - """ - - self.ptr = None - - ptr = _ffi.new('void**') - _handle_error(_lib.svm_alloc( - ctx.ptr, flags, size, alignment, - ptr)) - - self.ctx = ctx - self.ptr = ptr[0] - self.is_fine_grain = flags & svm_mem_flags.SVM_FINE_GRAIN_BUFFER - - if _interface is not None: - read_write = ( - flags & mem_flags.WRITE_ONLY != 0 - or flags & mem_flags.READ_WRITE != 0) - _interface["data"] = ( - int(_ffi.cast("intptr_t", self.ptr)), not read_write) - self.__array_interface__ = _interface - - def __del__(self): - if self.ptr is not None: - self.release() - - def release(self): - _handle_error(_lib.svm_free(self.ctx.ptr, self.ptr)) - self.ptr = None - - def enqueue_release(self, queue, wait_for=None): - """ - :arg flags: a combination of :class:`pyopencl.map_flags` - :returns: a :class:`pyopencl.Event` - - |std-enqueue-blurb| - """ - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_svm_free( - ptr_event, queue.ptr, 1, self.ptr, - c_wait_for, num_wait_for)) - - self.ctx = None - self.ptr = None - - return Event._create(ptr_event[0]) - -# }}} - - -# {{{ SVM - -# TODO add clSetKernelExecInfo - -class SVM(_CLKernelArg): - """Tags an object exhibiting the Python buffer interface (such as a - :class:`numpy.ndarray`) as referring to shared virtual memory. - - Depending on the features of the OpenCL implementation, the following - types of objects may be passed to/wrapped in this type: - - * coarse-grain shared memory as returned by (e.g.) :func:`csvm_empty` - for any implementation of OpenCL 2.0. - - This is how coarse-grain SVM may be used from both host and device:: - - svm_ary = cl.SVM(cl.csvm_empty(ctx, 1000, np.float32, alignment=64)) - assert isinstance(svm_ary.mem, np.ndarray) - - with svm_ary.map_rw(queue) as ary: - ary.fill(17) # use from host - - prg.twice(queue, svm_ary.mem.shape, None, svm_ary) - - * fine-grain shared memory as returned by (e.g.) :func:`fsvm_empty`, - if the implementation supports fine-grained shared virtual memory. - This memory may directly be passed to a kernel:: - - ary = cl.fsvm_empty(ctx, 1000, np.float32) - assert isinstance(ary, np.ndarray) - - prg.twice(queue, ary.shape, None, cl.SVM(ary)) - queue.finish() # synchronize - print(ary) # access from host - - Observe how mapping (as needed in coarse-grain SVM) is no longer - necessary. - - * any :class:`numpy.ndarray` (or other Python object with a buffer - interface) if the implementation supports fine-grained *system* shared - virtual memory. - - This is how plain :mod:`numpy` arrays may directly be passed to a - kernel:: - - ary = np.zeros(1000, np.float32) - prg.twice(queue, ary.shape, None, cl.SVM(ary)) - queue.finish() # synchronize - print(ary) # access from host - - Objects of this type may be passed to kernel calls and :func:`enqueue_copy`. - Coarse-grain shared-memory *must* be mapped into host address space using - :meth:`map` before being accessed through the :mod:`numpy` interface. - - .. note:: - - This object merely serves as a 'tag' that changes the behavior - of functions to which it is passed. It has no special management - relationship to the memory it tags. For example, it is permissible - to grab a :mod:`numpy.array` out of :attr:`SVM.mem` of one - :class:`SVM` instance and use the array to construct another. - Neither of the tags need to be kept alive. - - .. versionadded:: 2016.2 - - .. attribute:: mem - - The wrapped object. - - .. automethod:: __init__ - .. automethod:: map - .. automethod:: map_ro - .. automethod:: map_rw - .. automethod:: as_buffer - """ - - def __init__(self, mem): - self.mem = mem - - def map(self, queue, flags, is_blocking=True, wait_for=None): - """ - :arg is_blocking: If *False*, subsequent code must wait on - :attr:`SVMMap.event` in the returned object before accessing the - mapped memory. - :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to - read-write. - :returns: an :class:`SVMMap` instance - - |std-enqueue-blurb| - """ - writable = bool( - flags & (map_flags.WRITE | map_flags.WRITE_INVALIDATE_REGION)) - c_buf, size, _ = _c_buffer_from_obj(self.mem, writable=writable) - - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_svm_map( - ptr_event, queue.ptr, is_blocking, flags, - c_buf, size, - c_wait_for, num_wait_for)) - - evt = Event._create(ptr_event[0]) - return SVMMap(self, queue, evt) - - def map_ro(self, queue, is_blocking=True, wait_for=None): - """Like :meth:`map`, but with *flags* set for a read-only map.""" - - return self.map(queue, map_flags.READ, - is_blocking=is_blocking, wait_for=wait_for) - - def map_rw(self, queue, is_blocking=True, wait_for=None): - """Like :meth:`map`, but with *flags* set for a read-only map.""" - - return self.map(queue, map_flags.READ | map_flags.WRITE, - is_blocking=is_blocking, wait_for=wait_for) - - def _enqueue_unmap(self, queue, wait_for=None): - c_buf, _, _ = _c_buffer_from_obj(self.mem) - - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_svm_unmap( - ptr_event, queue.ptr, - c_buf, - c_wait_for, num_wait_for)) - - return Event._create(ptr_event[0]) - - def as_buffer(self, ctx, flags=None): - """ - :arg ctx: a :class:`Context` - :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to - read-write. - :returns: a :class:`Buffer` corresponding to *self*. - - The memory referred to by this object must not be freed before - the returned :class:`Buffer` is released. - """ - - if flags is None: - flags = mem_flags.READ_WRITE - - return Buffer(ctx, flags, size=self.mem.nbytes, hostbuf=self.mem) - - -def _enqueue_svm_memcpy(queue, dst, src, size=None, - wait_for=None, is_blocking=True): - dst_buf, dst_size, _ = _c_buffer_from_obj(dst, writable=True) - src_buf, src_size, _ = _c_buffer_from_obj(src, writable=False) - - if size is None: - size = min(dst_size, src_size) - - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_svm_memcpy( - ptr_event, queue.ptr, bool(is_blocking), - dst_buf, src_buf, size, - c_wait_for, num_wait_for, - NannyEvent._handle((dst_buf, src_buf)))) - - return NannyEvent._create(ptr_event[0]) - - -def enqueue_svm_memfill(queue, dest, pattern, byte_count=None, wait_for=None): - """Fill shared virtual memory with a pattern. - - :arg dest: a Python buffer object, optionally wrapped in an :class:`SVM` object - :arg pattern: a Python buffer object (e.g. a :class:`numpy.ndarray` with the - fill pattern to be used. - :arg byte_count: The size of the memory to be fill. Defaults to the - entirety of *dest*. - - |std-enqueue-blurb| - - .. versionadded:: 2016.2 - """ - - if isinstance(dest, SVM): - dest = dest.mem - - dst_buf, dst_size, _ = _c_buffer_from_obj(dest, writable=True) - pattern_buf, pattern_size, _ = _c_buffer_from_obj(pattern, writable=False) - - if byte_count is None: - byte_count = dst_size - - # pattern is copied, no need to nanny. - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_svm_memfill( - ptr_event, queue.ptr, - dst_buf, pattern_buf, pattern_size, byte_count, - c_wait_for, num_wait_for)) - - return Event._create(ptr_event[0]) - - -def enqueue_svm_migratemem(queue, svms, flags, wait_for=None): - """ - :arg svms: a collection of Python buffer objects (e.g. :mod:`numpy` - arrrays), optionally wrapped in :class:`SVM` objects. - :arg flags: a combination of :class:`mem_migration_flags` - - |std-enqueue-blurb| - - .. versionadded:: 2016.2 - - This function requires OpenCL 2.1. - """ - - svm_pointers = _ffi.new('void *', len(svms)) - sizes = _ffi.new('size_t', len(svms)) - - for i, svm in enumerate(svms): - if isinstance(svm, SVM): - svm = svm.mem - - buf, size, _ = _c_buffer_from_obj(svm, writable=False) - svm_pointers[i] = buf - sizes[i] = size - - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_svm_memfill( - ptr_event, queue.ptr, - len(svms), svm_pointers, sizes, flags, - c_wait_for, num_wait_for)) - - return Event._create(ptr_event[0]) - -# }}} - - -# {{{ SVMMap - -class SVMMap(_CLKernelArg): - """ - .. attribute:: event - - .. versionadded:: 2016.2 - - .. automethod:: release - - This class may also be used as a context manager in a ``with`` statement. - :meth:`release` will be called upon exit from the ``with`` region. - The value returned to the ``as`` part of the context manager is the - mapped Python object (e.g. a :mod:`numpy` array). - """ - def __init__(self, svm, queue, event): - self.svm = svm - self.queue = queue - self.event = event - - def __del__(self): - if self.svm is not None: - self.release() - - def __enter__(self): - return self.svm.mem - - def __exit__(self, exc_type, exc_val, exc_tb): - self.release() - - def release(self, queue=None, wait_for=None): - """ - :arg queue: a :class:`pyopencl.CommandQueue`. Defaults to the one - with which the map was created, if not specified. - :returns: a :class:`pyopencl.Event` - - |std-enqueue-blurb| - """ - - evt = self.svm._enqueue_unmap(self.queue) - self.svm = None - - return evt - -# }}} - - -# {{{ Program - -class CompilerWarning(UserWarning): - pass - - -def compiler_output(text): - import os - from warnings import warn - if int(os.environ.get("PYOPENCL_COMPILER_OUTPUT", "0")): - warn(text, CompilerWarning) - else: - warn("Non-empty compiler output encountered. Set the " - "environment variable PYOPENCL_COMPILER_OUTPUT=1 " - "to see more.", CompilerWarning) - - -class _Program(_Common): - _id = 'program' - - def __init__(self, *args): - if len(args) == 2: - ctx, source = args - from pyopencl.tools import is_spirv - if is_spirv(source): - self._init_il(ctx, source) - else: - self._init_source(ctx, source) - else: - self._init_binary(*args) - - def _init_source(self, context, src): - ptr_program = _ffi.new('clobj_t*') - _handle_error(_lib.create_program_with_source( - ptr_program, context.ptr, _to_cstring(src))) - self.ptr = ptr_program[0] - - def _init_il(self, context, il): - ptr_program = _ffi.new('clobj_t*') - _handle_error(_lib.create_program_with_il( - ptr_program, context.ptr, il, len(il))) - self.ptr = ptr_program[0] - - def _init_binary(self, context, devices, binaries): - if len(devices) != len(binaries): - raise RuntimeError("device and binary counts don't match", - status_code.INVALID_VALUE, - "create_program_with_binary") - - ptr_program = _ffi.new('clobj_t*') - ptr_devices, num_devices = _clobj_list(devices) - ptr_binaries = [_ffi.new('unsigned char[]', binary) - for binary in binaries] - binary_sizes = [len(b) for b in binaries] - - # TODO parameter order? (for clobj_list) - _handle_error(_lib.create_program_with_binary( - ptr_program, context.ptr, num_devices, ptr_devices, - ptr_binaries, binary_sizes)) - - self.ptr = ptr_program[0] - - def kind(self): - kind = _ffi.new('int*') - _handle_error(_lib.program__kind(self.ptr, kind)) - return kind[0] - - def _build(self, options=None, devices=None): - if options is None: - options = b"" - # TODO? reverse parameter order - ptr_devices, num_devices = _clobj_list(devices) - _handle_error(_lib.program__build(self.ptr, options, - num_devices, ptr_devices)) - - def get_build_info(self, device, param): - info = _ffi.new('generic_info *') - _handle_error(_lib.program__get_build_info( - self.ptr, device.ptr, param, info)) - return _generic_info_to_python(info) - - def compile(self, options="", devices=None, headers=[]): - _devs, num_devs = _clobj_list(devices) - _prgs, names = list(zip(*((prg.ptr, _to_cstring(name)) - for (name, prg) in headers))) - _handle_error(_lib.program__compile( - self.ptr, _to_cstring(options), _devs, num_devs, - _prgs, names, len(names))) - - @classmethod - def link(cls, context, programs, options="", devices=None): - _devs, num_devs = _clobj_list(devices) - _prgs, num_prgs = _clobj_list(programs) - _prg = _ffi.new('clobj_t*') - _handle_error(_lib.program__link( - _prg, context.ptr, _prgs, num_prgs, _to_cstring(options), - _devs, num_devs)) - return cls._create(_prg[0]) - - @classmethod - def create_with_builtin_kernels(cls, context, devices, kernel_names): - _devs, num_devs = _clobj_list(devices) - _prg = _ffi.new('clobj_t*') - _handle_error(_lib.program__create_with_builtin_kernels( - _prg, context.ptr, _devs, num_devs, _to_cstring(kernel_names))) - return cls._create(_prg[0]) - - def all_kernels(self): - knls = _CArray(_ffi.new('clobj_t**')) - _handle_error(_lib.program__all_kernels( - self.ptr, knls.ptr, knls.size)) - return [ - Kernel - ._create(knls.ptr[0][i]) - ._setup(self) - for i in range(knls.size[0])] - - def _get_build_logs(self): - build_logs = [] - for dev in self.get_info(program_info.DEVICES): - try: - log = self.get_build_info(dev, program_build_info.LOG) - except Exception: - log = "" - - build_logs.append((dev, log)) - - return build_logs - - def build(self, options_bytes, devices=None): - logger.debug("build program: start") - err = None - try: - self._build(options=options_bytes, devices=devices) - except Error as e: - msg = e.what + "\n\n" + (75 * "=" + "\n").join( - "Build on %s:\n\n%s" % (dev, log) - for dev, log in self._get_build_logs()) - code = e.code - routine = e.routine - - err = RuntimeError( - Error._ErrorRecord( - msg=msg, - code=code, - routine=routine)) - - if err is not None: - # Python 3.2 outputs the whole list of currently active exceptions - # This serves to remove one (redundant) level from that nesting. - - logger.debug("build program: completed, error") - raise err - - logger.debug("build program: completed, success") - - message = (75 * "=" + "\n").join( - "Build on %s succeeded, but said:\n\n%s" % (dev, log) - for dev, log in self._get_build_logs() - if log is not None and log.strip()) - - if message: - if self.kind() == program_kind.SOURCE: - build_type = "From-source build" - elif self.kind() == program_kind.BINARY: - build_type = "From-binary build" - else: - build_type = "Build" - - compiler_output("%s succeeded, but resulted in non-empty logs:\n%s" - % (build_type, message)) - - return self - -# }}} - - -class LocalMemory(_CLKernelArg): - __slots__ = ('_size',) - - def __init__(self, size): - self._size = size - - @property - def size(self): - return self._size - - -# {{{ Kernel - -class Kernel(_Common): - _id = 'kernel' - - def __init__(self, program, name): - if not isinstance(program, _Program): - program = program._get_prg() - - ptr_kernel = _ffi.new('clobj_t*') - _handle_error(_lib.create_kernel(ptr_kernel, program.ptr, - _to_cstring(name))) - self.ptr = ptr_kernel[0] - - self._setup(program) - - def _setup(self, prg): - self._source = getattr(prg, "_source", None) - - from pyopencl.invoker import generate_enqueue_and_set_args - self._enqueue, self._set_args = generate_enqueue_and_set_args( - self.function_name, self.num_args, self.num_args, - None, - warn_about_arg_count_bug=None, - work_around_arg_count_bug=None) - - self._wg_info_cache = {} - return self - - def set_scalar_arg_dtypes(self, scalar_arg_dtypes): - self._scalar_arg_dtypes = tuple(scalar_arg_dtypes) - - # {{{ arg counting bug handling - - # For example: - # https://github.com/pocl/pocl/issues/197 - # (but Apple CPU has a similar bug) - - work_around_arg_count_bug = False - warn_about_arg_count_bug = False - - from pyopencl.characterize import has_struct_arg_count_bug - - count_bug_per_dev = [ - has_struct_arg_count_bug(dev, self.context) - for dev in self.context.devices] - - from pytools import single_valued - if any(count_bug_per_dev): - if all(count_bug_per_dev): - work_around_arg_count_bug = single_valued(count_bug_per_dev) - else: - warn_about_arg_count_bug = True - - # }}} - - from pyopencl.invoker import generate_enqueue_and_set_args - self._enqueue, self._set_args = generate_enqueue_and_set_args( - self.function_name, - len(scalar_arg_dtypes), self.num_args, - self._scalar_arg_dtypes, - warn_about_arg_count_bug=warn_about_arg_count_bug, - work_around_arg_count_bug=work_around_arg_count_bug) - - def set_args(self, *args, **kwargs): - # Need to duplicate the 'self' argument for dynamically generated method - return self._set_args(self, *args, **kwargs) - - def __call__(self, queue, global_size, local_size, *args, **kwargs): - # __call__ can't be overridden directly, so we need this - # trampoline hack. - return self._enqueue(self, queue, global_size, local_size, *args, **kwargs) - - def capture_call(self, filename, queue, global_size, local_size, - *args, **kwargs): - from pyopencl.capture_call import capture_kernel_call - capture_kernel_call(self, filename, queue, global_size, local_size, - *args, **kwargs) - - def _set_arg_clkernelarg(self, arg_index, arg): - if isinstance(arg, MemoryObjectHolder): - _handle_error(_lib.kernel__set_arg_mem(self.ptr, arg_index, arg.ptr)) - elif isinstance(arg, SVM): - c_buf, _, _ = _c_buffer_from_obj(arg.mem) - _handle_error(_lib.kernel__set_arg_svm_pointer( - self.ptr, arg_index, c_buf)) - elif isinstance(arg, Sampler): - _handle_error(_lib.kernel__set_arg_sampler(self.ptr, arg_index, - arg.ptr)) - elif isinstance(arg, LocalMemory): - _handle_error(_lib.kernel__set_arg_buf(self.ptr, arg_index, - _ffi.NULL, arg.size)) - else: - raise RuntimeError("unexpected _CLKernelArg subclass" - "dimensions", status_code.INVALID_VALUE, - "clSetKernelArg") - - def set_arg(self, arg_index, arg): - # If you change this, also change the kernel call generation logic. - if arg is None: - _handle_error(_lib.kernel__set_arg_null(self.ptr, arg_index)) - elif isinstance(arg, _CLKernelArg): - self._set_arg_clkernelarg(arg_index, arg) - elif _CPY2 and isinstance(arg, np.generic): - # https://github.com/numpy/numpy/issues/5381 - c_buf, size, _ = _c_buffer_from_obj(np.getbuffer(arg)) - _handle_error(_lib.kernel__set_arg_buf(self.ptr, arg_index, - c_buf, size)) - else: - c_buf, size, _ = _c_buffer_from_obj(arg) - _handle_error(_lib.kernel__set_arg_buf(self.ptr, arg_index, - c_buf, size)) - - def get_work_group_info(self, param, device): - try: - return self._wg_info_cache[param, device] - except KeyError: - pass - - info = _ffi.new('generic_info*') - _handle_error(_lib.kernel__get_work_group_info( - self.ptr, param, device.ptr, info)) - result = _generic_info_to_python(info) - - self._wg_info_cache[param, device] = result - return result - - def get_arg_info(self, idx, param): - info = _ffi.new('generic_info*') - _handle_error(_lib.kernel__get_arg_info(self.ptr, idx, param, info)) - return _generic_info_to_python(info) - -# }}} - - -# {{{ Event - -class Event(_Common): - _id = 'event' - - def __init__(self): - pass - - def get_profiling_info(self, param): - info = _ffi.new('generic_info *') - _handle_error(_lib.event__get_profiling_info(self.ptr, param, info)) - return _generic_info_to_python(info) - - def wait(self): - _handle_error(_lib.event__wait(self.ptr)) - - def set_callback(self, _type, cb): - def _func(status): - cb(status) - _handle_error(_lib.event__set_callback(self.ptr, _type, - _ffi.new_handle(_func))) - - -class ProfilingInfoGetter: - def __init__(self, event): - self.event = event - - def __getattr__(self, name): - info_cls = profiling_info - - try: - inf_attr = getattr(info_cls, name.upper()) - except AttributeError: - raise AttributeError("%s has no attribute '%s'" - % (type(self), name)) - else: - return self.event.get_profiling_info(inf_attr) - - -Event.profile = property(ProfilingInfoGetter) - - -def wait_for_events(wait_for): - if wait_for is None or len(wait_for) == 0: - return - _handle_error(_lib.wait_for_events(*_clobj_list(wait_for))) - - -class NannyEvent(Event): - class _Data(object): - __slots__ = ('ward', 'ref') - - def __init__(self, ward, ref): - self.ward = ward - self.ref = ref - - @classmethod - def _handle(cls, ward, ref=None): - return _ffi.new_handle(cls._Data(ward, ref)) - - def get_ward(self): - _handle = _lib.nanny_event__get_ward(self.ptr) - if _handle == _ffi.NULL: - return - return _ffi.from_handle(_handle).ward - - -class UserEvent(Event): - def __init__(self, ctx): - _evt = _ffi.new('clobj_t*') - _handle_error(_lib.create_user_event(_evt, ctx.ptr)) - self.ptr = _evt[0] - - def set_status(self, status): - _handle_error(_lib.user_event__set_status(self.ptr, status)) - -# }}} - - -# {{{ enqueue_nd_range_kernel - -def enqueue_nd_range_kernel(queue, kernel, global_work_size, local_work_size, - global_work_offset=None, wait_for=None, - g_times_l=False): - - work_dim = len(global_work_size) - - if local_work_size is not None: - if g_times_l: - work_dim = max(work_dim, len(local_work_size)) - elif work_dim != len(local_work_size): - raise RuntimeError("global/local work sizes have differing " - "dimensions", status_code.INVALID_VALUE, - "enqueue_nd_range_kernel") - - if len(local_work_size) < work_dim: - local_work_size = (local_work_size + - (1,) * (work_dim - len(local_work_size))) - if len(global_work_size) < work_dim: - global_work_size = (global_work_size + - (1,) * (work_dim - len(global_work_size))) - if g_times_l: - global_work_size = tuple( - global_work_size[i] * local_work_size[i] - for i in range(work_dim)) - - c_global_work_offset = _ffi.NULL - if global_work_offset is not None: - if work_dim != len(global_work_offset): - raise RuntimeError("global work size and offset have differing " - "dimensions", status_code.INVALID_VALUE, - "enqueue_nd_range_kernel") - - c_global_work_offset = global_work_offset - - if local_work_size is None: - local_work_size = _ffi.NULL - - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_nd_range_kernel( - ptr_event, queue.ptr, kernel.ptr, work_dim, c_global_work_offset, - global_work_size, local_work_size, c_wait_for, num_wait_for)) - return Event._create(ptr_event[0]) - -# }}} - - -# {{{ enqueue_task - -def enqueue_task(queue, kernel, wait_for=None): - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_task( - _event, queue.ptr, kernel.ptr, c_wait_for, num_wait_for)) - return Event._create(_event[0]) - -# }}} - - -# {{{ _enqueue_marker_* - -def _enqueue_marker_with_wait_list(queue, wait_for=None): - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_marker_with_wait_list( - ptr_event, queue.ptr, c_wait_for, num_wait_for)) - return Event._create(ptr_event[0]) - - -def _enqueue_marker(queue): - ptr_event = _ffi.new('clobj_t*') - _handle_error(_lib.enqueue_marker(ptr_event, queue.ptr)) - return Event._create(ptr_event[0]) - -# }}} - - -# {{{ _enqueue_barrier_* - -def _enqueue_barrier_with_wait_list(queue, wait_for=None): - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_barrier_with_wait_list( - ptr_event, queue.ptr, c_wait_for, num_wait_for)) - return Event._create(ptr_event[0]) - - -def _enqueue_barrier(queue): - _handle_error(_lib.enqueue_barrier(queue.ptr)) - -# }}} - - -# {{{ enqueue_migrate_mem_object* - -def enqueue_migrate_mem_objects(queue, mem_objects, flags, wait_for=None): - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - c_mem_objs, num_mem_objs = _clobj_list(mem_objects) - _handle_error(_lib.enqueue_migrate_mem_objects( - _event, queue.ptr, c_mem_objs, num_mem_objs, flags, - c_wait_for, num_wait_for)) - return Event._create(_event[0]) - - -def enqueue_migrate_mem_object_ext(queue, mem_objects, flags, wait_for=None): - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - c_mem_objs, num_mem_objs = _clobj_list(mem_objects) - _handle_error(_lib.enqueue_migrate_mem_object_ext( - _event, queue.ptr, c_mem_objs, num_mem_objs, flags, - c_wait_for, num_wait_for)) - return Event._create(_event[0]) - -# }}} - - -# {{{ _enqueue_wait_for_events - -def _enqueue_wait_for_events(queue, wait_for=None): - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_wait_for_events(queue.ptr, c_wait_for, - num_wait_for)) - -# }}} - - -# {{{ _enqueue_*_buffer - -def _enqueue_read_buffer(queue, mem, hostbuf, device_offset=0, - wait_for=None, is_blocking=True): - c_buf, size, _ = _c_buffer_from_obj(hostbuf, writable=True) - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_read_buffer( - ptr_event, queue.ptr, mem.ptr, c_buf, size, device_offset, - c_wait_for, num_wait_for, bool(is_blocking), - NannyEvent._handle(hostbuf))) - return NannyEvent._create(ptr_event[0]) - - -def _enqueue_write_buffer(queue, mem, hostbuf, device_offset=0, - wait_for=None, is_blocking=True): - c_buf, size, c_ref = _c_buffer_from_obj(hostbuf, retain=True) - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_write_buffer( - ptr_event, queue.ptr, mem.ptr, c_buf, size, device_offset, - c_wait_for, num_wait_for, bool(is_blocking), - NannyEvent._handle(hostbuf, c_ref))) - return NannyEvent._create(ptr_event[0]) - - -def _enqueue_copy_buffer(queue, src, dst, byte_count=-1, src_offset=0, - dst_offset=0, wait_for=None): - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_copy_buffer( - ptr_event, queue.ptr, src.ptr, dst.ptr, byte_count, src_offset, - dst_offset, c_wait_for, num_wait_for)) - return Event._create(ptr_event[0]) - - -def _enqueue_read_buffer_rect(queue, mem, hostbuf, buffer_origin, - host_origin, region, buffer_pitches=None, - host_pitches=None, wait_for=None, - is_blocking=True): - buffer_origin = tuple(buffer_origin) - host_origin = tuple(host_origin) - region = tuple(region) - if buffer_pitches is None: - buffer_pitches = _ffi.NULL - buffer_pitches_l = 0 - else: - buffer_pitches = tuple(buffer_pitches) - buffer_pitches_l = len(buffer_pitches) - if host_pitches is None: - host_pitches = _ffi.NULL - host_pitches_l = 0 - else: - host_pitches = tuple(host_pitches) - host_pitches_l = len(host_pitches) - - buffer_origin_l = len(buffer_origin) - host_origin_l = len(host_origin) - region_l = len(region) - if (buffer_origin_l > 3 or host_origin_l > 3 or region_l > 3 or - buffer_pitches_l > 2 or host_pitches_l > 2): - raise RuntimeError("(buffer/host)_origin, (buffer/host)_pitches or " - "region has too many components", - status_code.INVALID_VALUE, - "enqueue_read_buffer_rect") - c_buf, size, _ = _c_buffer_from_obj(hostbuf, writable=True) - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_read_buffer_rect( - _event, queue.ptr, mem.ptr, c_buf, buffer_origin, buffer_origin_l, - host_origin, host_origin_l, region, region_l, buffer_pitches, - buffer_pitches_l, host_pitches, host_pitches_l, c_wait_for, - num_wait_for, bool(is_blocking), NannyEvent._handle(hostbuf))) - return NannyEvent._create(_event[0]) - - -def _enqueue_write_buffer_rect(queue, mem, hostbuf, buffer_origin, - host_origin, region, buffer_pitches=None, - host_pitches=None, wait_for=None, - is_blocking=True): - buffer_origin = tuple(buffer_origin) - host_origin = tuple(host_origin) - region = tuple(region) - if buffer_pitches is None: - buffer_pitches = _ffi.NULL - buffer_pitches_l = 0 - else: - buffer_pitches = tuple(buffer_pitches) - buffer_pitches_l = len(buffer_pitches) - if host_pitches is None: - host_pitches = _ffi.NULL - host_pitches_l = 0 - else: - host_pitches = tuple(host_pitches) - host_pitches_l = len(host_pitches) - - buffer_origin_l = len(buffer_origin) - host_origin_l = len(host_origin) - region_l = len(region) - if (buffer_origin_l > 3 or host_origin_l > 3 or region_l > 3 or - buffer_pitches_l > 2 or host_pitches_l > 2): - raise RuntimeError("(buffer/host)_origin, (buffer/host)_pitches or " - "region has too many components", - status_code.INVALID_VALUE, - "enqueue_write_buffer_rect") - c_buf, size, c_ref = _c_buffer_from_obj(hostbuf, retain=True) - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_write_buffer_rect( - _event, queue.ptr, mem.ptr, c_buf, buffer_origin, buffer_origin_l, - host_origin, host_origin_l, region, region_l, buffer_pitches, - buffer_pitches_l, host_pitches, host_pitches_l, c_wait_for, - num_wait_for, bool(is_blocking), NannyEvent._handle(hostbuf, c_ref))) - return NannyEvent._create(_event[0]) - - -def _enqueue_copy_buffer_rect(queue, src, dst, src_origin, dst_origin, region, - src_pitches=None, dst_pitches=None, - wait_for=None): - src_origin = tuple(src_origin) - dst_origin = tuple(dst_origin) - region = tuple(region) - if src_pitches is None: - src_pitches = _ffi.NULL - src_pitches_l = 0 - else: - src_pitches = tuple(src_pitches) - src_pitches_l = len(src_pitches) - if dst_pitches is None: - dst_pitches = _ffi.NULL - dst_pitches_l = 0 - else: - dst_pitches = tuple(dst_pitches) - dst_pitches_l = len(dst_pitches) - src_origin_l = len(src_origin) - dst_origin_l = len(dst_origin) - region_l = len(region) - if (src_origin_l > 3 or dst_origin_l > 3 or region_l > 3 or - src_pitches_l > 2 or dst_pitches_l > 2): - raise RuntimeError("(src/dst)_origin, (src/dst)_pitches or " - "region has too many components", - status_code.INVALID_VALUE, - "enqueue_copy_buffer_rect") - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_copy_buffer_rect( - _event, queue.ptr, src.ptr, dst.ptr, src_origin, src_origin_l, - dst_origin, dst_origin_l, region, region_l, src_pitches, - src_pitches_l, dst_pitches, dst_pitches_l, c_wait_for, num_wait_for)) - return Event._create(_event[0]) - - -# PyPy bug report: https://bitbucket.org/pypy/pypy/issue/1777/unable-to-create-proper-numpy-array-from # noqa -def enqueue_map_buffer(queue, buf, flags, offset, shape, dtype, - order="C", strides=None, wait_for=None, - is_blocking=True): - dtype, shape, strides = _norm_shape_dtype(shape, dtype, order, strides, - 'enqueue_map_buffer') - byte_size = dtype.itemsize - for s in shape: - byte_size *= s - c_wait_for, num_wait_for = _clobj_list(wait_for) - _event = _ffi.new('clobj_t*') - _map = _ffi.new('clobj_t*') - _handle_error(_lib.enqueue_map_buffer(_event, _map, queue.ptr, buf.ptr, - flags, offset, byte_size, c_wait_for, - num_wait_for, bool(is_blocking))) - mmap = MemoryMap._create(_map[0], shape, dtype.str, strides) - ary = np.asarray(mmap) - ary.dtype = dtype - - return (ary, Event._create(_event[0])) - - -def _enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None): - c_pattern, psize, c_ref = _c_buffer_from_obj(pattern) - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_fill_buffer( - _event, queue.ptr, mem.ptr, c_pattern, psize, offset, size, - c_wait_for, num_wait_for)) - return Event._create(_event[0]) - -# }}} - - -# {{{ _enqueue_*_image - -def _enqueue_read_image(queue, mem, origin, region, hostbuf, row_pitch=0, - slice_pitch=0, wait_for=None, is_blocking=True): - origin = tuple(origin) - region = tuple(region) - origin_l = len(origin) - region_l = len(region) - if origin_l > 3 or region_l > 3: - raise RuntimeError("origin or region has too many components", - status_code.INVALID_VALUE, "enqueue_read_image") - c_buf, size, _ = _c_buffer_from_obj(hostbuf, writable=True) - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - # TODO check buffer size - _handle_error(_lib.enqueue_read_image( - ptr_event, queue.ptr, mem.ptr, origin, origin_l, region, region_l, - c_buf, row_pitch, slice_pitch, c_wait_for, num_wait_for, - bool(is_blocking), NannyEvent._handle(hostbuf))) - return NannyEvent._create(ptr_event[0]) - - -def _enqueue_copy_image(queue, src, dest, src_origin, dest_origin, region, - wait_for=None): - src_origin = tuple(src_origin) - region = tuple(region) - src_origin_l = len(src_origin) - dest_origin_l = len(dest_origin) - region_l = len(region) - if src_origin_l > 3 or dest_origin_l > 3 or region_l > 3: - raise RuntimeError("(src/dest)origin or region has too many components", - status_code.INVALID_VALUE, "enqueue_copy_image") - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_copy_image( - _event, queue.ptr, src.ptr, dest.ptr, src_origin, src_origin_l, - dest_origin, dest_origin_l, region, region_l, c_wait_for, num_wait_for)) - return Event._create(_event[0]) - - -def _enqueue_write_image(queue, mem, origin, region, hostbuf, row_pitch=0, - slice_pitch=0, wait_for=None, is_blocking=True): - origin = tuple(origin) - region = tuple(region) - origin_l = len(origin) - region_l = len(region) - if origin_l > 3 or region_l > 3: - raise RuntimeError("origin or region has too many components", - status_code.INVALID_VALUE, "enqueue_write_image") - c_buf, size, c_ref = _c_buffer_from_obj(hostbuf, retain=True) - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - # TODO: check buffer size - _handle_error(_lib.enqueue_write_image( - _event, queue.ptr, mem.ptr, origin, origin_l, region, region_l, - c_buf, row_pitch, slice_pitch, c_wait_for, num_wait_for, - bool(is_blocking), NannyEvent._handle(hostbuf, c_ref))) - return NannyEvent._create(_event[0]) - - -def enqueue_map_image(queue, img, flags, origin, region, shape, dtype, - order="C", strides=None, wait_for=None, is_blocking=True): - origin = tuple(origin) - region = tuple(region) - origin_l = len(origin) - region_l = len(region) - if origin_l > 3 or region_l > 3: - raise RuntimeError("origin or region has too many components", - status_code.INVALID_VALUE, "enqueue_map_image") - dtype, shape, strides = _norm_shape_dtype(shape, dtype, order, strides, - 'enqueue_map_image') - _event = _ffi.new('clobj_t*') - _map = _ffi.new('clobj_t*') - _row_pitch = _ffi.new('size_t*') - _slice_pitch = _ffi.new('size_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_map_image(_event, _map, queue.ptr, img.ptr, - flags, origin, origin_l, region, - region_l, _row_pitch, _slice_pitch, - c_wait_for, num_wait_for, is_blocking)) - mmap = MemoryMap._create(_map[0], shape, dtype.str, strides) - ary = np.asarray(mmap) - ary.dtype = dtype - return (ary, Event._create(_event[0]), _row_pitch[0], _slice_pitch[0]) - - -def enqueue_fill_image(queue, img, color, origin, region, wait_for=None): - origin = tuple(origin) - region = tuple(region) - origin_l = len(origin) - region_l = len(region) - color_l = len(color) - if origin_l > 3 or region_l > 3 or color_l > 4: - raise RuntimeError("origin, region or color has too many components", - status_code.INVALID_VALUE, "enqueue_fill_image") - color = np.array(color).astype(img._fill_type) - c_color = _ffi.cast('void*', color.__array_interface__['data'][0]) - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_fill_image(_event, queue.ptr, img.ptr, - c_color, origin, origin_l, region, - region_l, c_wait_for, num_wait_for)) - return Event._create(_event[0]) - - -def _enqueue_copy_image_to_buffer(queue, src, dest, origin, region, offset, - wait_for=None): - origin = tuple(origin) - region = tuple(region) - origin_l = len(origin) - region_l = len(region) - if origin_l > 3 or region_l > 3: - raise RuntimeError("origin or region has too many components", - status_code.INVALID_VALUE, - "enqueue_copy_image_to_buffer") - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_copy_image_to_buffer( - _event, queue.ptr, src.ptr, dest.ptr, origin, origin_l, region, - region_l, offset, c_wait_for, num_wait_for)) - return Event._create(_event[0]) - - -def _enqueue_copy_buffer_to_image(queue, src, dest, offset, origin, region, - wait_for=None): - origin = tuple(origin) - region = tuple(region) - origin_l = len(origin) - region_l = len(region) - if origin_l > 3 or region_l > 3: - raise RuntimeError("origin or region has too many components", - status_code.INVALID_VALUE, - "enqueue_copy_buffer_to_image") - _event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - _handle_error(_lib.enqueue_copy_buffer_to_image( - _event, queue.ptr, src.ptr, dest.ptr, offset, origin, origin_l, - region, region_l, c_wait_for, num_wait_for)) - return Event._create(_event[0]) - -# }}} - - -# {{{ gl interop - -def have_gl(): - return bool(_lib.have_gl()) - - -class _GLObject(object): - def get_gl_object_info(self): - otype = _ffi.new('cl_gl_object_type*') - gl_name = _ffi.new('GLuint*') - _handle_error(_lib.get_gl_object_info(self.ptr, otype, gl_name)) - return otype[0], gl_name[0] - - -class GLBuffer(MemoryObject, _GLObject): - _id = 'gl_buffer' - - def __init__(self, context, flags, bufobj): - MemoryObject.__init__(self) - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_from_gl_buffer( - ptr, context.ptr, flags, bufobj)) - self.ptr = ptr[0] - - -class GLRenderBuffer(MemoryObject, _GLObject): - _id = 'gl_renderbuffer' - - def __init__(self, context, flags, bufobj): - MemoryObject.__init__(self, bufobj) - c_buf, bufsize, retained = self._handle_buf_flags(flags) - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_from_gl_renderbuffer( - ptr, context.ptr, flags, c_buf)) - self.ptr = ptr[0] - - -def _create_gl_enqueue(what): - def enqueue_gl_objects(queue, mem_objects, wait_for=None): - ptr_event = _ffi.new('clobj_t*') - c_wait_for, num_wait_for = _clobj_list(wait_for) - c_mem_objects, num_mem_objects = _clobj_list(mem_objects) - _handle_error(what(ptr_event, queue.ptr, c_mem_objects, num_mem_objects, - c_wait_for, num_wait_for)) - return Event._create(ptr_event[0]) - return enqueue_gl_objects - - -if _lib.have_gl(): - enqueue_acquire_gl_objects = _create_gl_enqueue( - _lib.enqueue_acquire_gl_objects) - enqueue_release_gl_objects = _create_gl_enqueue( - _lib.enqueue_release_gl_objects) - try: - get_apple_cgl_share_group = _lib.get_apple_cgl_share_group - except AttributeError: - pass - -# }}} - - -def _cffi_property(_name=None, read=True, write=True): - def _deco(get_ptr): - name = _name if _name else get_ptr.__name__ - return property((lambda self: getattr(get_ptr(self), name)) if read - else (lambda self: None), - (lambda self, v: setattr(get_ptr(self), name, v)) - if write else (lambda self, v: None)) - return _deco - - -# {{{ ImageFormat - -class ImageFormat(object): - # Hack around fmt.__dict__ check in test_wrapper.py - __dict__ = {} - __slots__ = ('ptr',) - - def __init__(self, channel_order=0, channel_type=0): - self.ptr = _ffi.new("cl_image_format*") - self.channel_order = channel_order - self.channel_data_type = channel_type - - @_cffi_property('image_channel_order') - def channel_order(self): - return self.ptr - - @_cffi_property('image_channel_data_type') - def channel_data_type(self): - return self.ptr - - @property - def channel_count(self): - try: - return { - channel_order.R: 1, - channel_order.A: 1, - channel_order.RG: 2, - channel_order.RA: 2, - channel_order.RGB: 3, - channel_order.RGBA: 4, - channel_order.BGRA: 4, - channel_order.INTENSITY: 1, - channel_order.LUMINANCE: 1, - }[self.channel_order] - except KeyError: - raise LogicError("unrecognized channel order", - status_code.INVALID_VALUE, - "ImageFormat.channel_count") - - @property - def dtype_size(self): - try: - return { - channel_type.SNORM_INT8: 1, - channel_type.SNORM_INT16: 2, - channel_type.UNORM_INT8: 1, - channel_type.UNORM_INT16: 2, - channel_type.UNORM_SHORT_565: 2, - channel_type.UNORM_SHORT_555: 2, - channel_type.UNORM_INT_101010: 4, - channel_type.SIGNED_INT8: 1, - channel_type.SIGNED_INT16: 2, - channel_type.SIGNED_INT32: 4, - channel_type.UNSIGNED_INT8: 1, - channel_type.UNSIGNED_INT16: 2, - channel_type.UNSIGNED_INT32: 4, - channel_type.HALF_FLOAT: 2, - channel_type.FLOAT: 4, - }[self.channel_data_type] - except KeyError: - raise LogicError("unrecognized channel data type", - status_code.INVALID_VALUE, - "ImageFormat.channel_dtype_size") - - @property - def itemsize(self): - return self.channel_count * self.dtype_size - - def __repr__(self): - return "ImageFormat(%s, %s)" % ( - channel_order.to_string(self.channel_order, - ""), - channel_type.to_string(self.channel_data_type, - "")) - - def __eq__(self, other): - return (self.channel_order == other.channel_order - and self.channel_data_type == other.channel_data_type) - - def __ne__(self, other): - return not self.__eq__(other) - - def __hash__(self): - return hash((type(self), self.channel_order, self.channel_data_type)) - - -def get_supported_image_formats(context, flags, image_type): - info = _ffi.new('generic_info*') - _handle_error(_lib.context__get_supported_image_formats( - context.ptr, flags, image_type, info)) - return _generic_info_to_python(info) - -# }}} - - -# {{{ ImageDescriptor - -def _write_only_property(*arg): - return property().setter(*arg) - - -class ImageDescriptor(object): - __slots__ = ('ptr',) - - def __init__(self): - self.ptr = _ffi.new("cl_image_desc*") - - @_cffi_property() - def image_type(self): - return self.ptr - - @_cffi_property('image_array_size') - def array_size(self): - return self.ptr - - @_cffi_property() - def num_mip_levels(self): - return self.ptr - - @_cffi_property() - def num_samples(self): - return self.ptr - - @_write_only_property - def shape(self, shape): - sdims = len(shape) - if sdims > 3: - raise LogicError("shape has too many components", - status_code.INVALID_VALUE, "transfer") - desc = self.ptr - desc.image_width = shape[0] if sdims > 0 else 1 - desc.image_height = shape[1] if sdims > 1 else 1 - desc.image_depth = shape[2] if sdims > 2 else 1 - desc.image_array_size = desc.image_depth - - @_write_only_property - def pitches(self, pitches): - pdims = len(pitches) - if pdims > 2: - raise LogicError("pitches has too many components", - status_code.INVALID_VALUE, "transfer") - desc = self.ptr - desc.image_row_pitch = pitches[0] if pdims > 0 else 1 - desc.image_slice_pitch = pitches[1] if pdims > 1 else 1 - - @_write_only_property - def buffer(self, buff): - self.ptr.buffer = buff.ptr.int_ptr if buff else _ffi.NULL - -# }}} - - -# {{{ Image - -_int_dtype = ({ - 8: np.int64, - 4: np.int32, - 2: np.int16, - 1: np.int8, -})[_ffi.sizeof('int')] - -_uint_dtype = ({ - 8: np.uint64, - 4: np.uint32, - 2: np.uint16, - 1: np.uint8, -})[_ffi.sizeof('unsigned')] - -_float_dtype = ({ - 8: np.float64, - 4: np.float32, - 2: np.float16, -})[_ffi.sizeof('float')] - -_fill_dtype_dict = { - _lib.TYPE_INT: _int_dtype, - _lib.TYPE_UINT: _uint_dtype, - _lib.TYPE_FLOAT: _float_dtype, - } - - -class Image(MemoryObject): - _id = 'image' - - def __init_dispatch(self, *args): - if len(args) == 5: - # >= 1.2 - self.__init_1_2(*args) - elif len(args) == 6: - # <= 1.1 - self.__init_legacy(*args) - else: - assert False - self._fill_type = _fill_dtype_dict[_lib.image__get_fill_type(self.ptr)] - - def __init_1_2(self, context, flags, fmt, desc, hostbuf): - MemoryObject.__init__(self, hostbuf) - c_buf, size, retained_buf = self._handle_buf_flags(flags) - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_image_from_desc(ptr, context.ptr, flags, - fmt.ptr, desc.ptr, c_buf)) - self.ptr = ptr[0] - - def __init_legacy(self, context, flags, fmt, shape, pitches, hostbuf): - if shape is None: - raise LogicError("'shape' must be given", - status_code.INVALID_VALUE, "Image") - MemoryObject.__init__(self, hostbuf) - c_buf, size, retained_buf = self._handle_buf_flags(flags) - dims = len(shape) - if dims == 2: - width, height = shape - pitch = 0 - if pitches is not None: - try: - pitch, = pitches - except ValueError: - raise LogicError("invalid length of pitch tuple", - status_code.INVALID_VALUE, "Image") - - # check buffer size - if (hostbuf is not None and - max(pitch, width * fmt.itemsize) * height > size): - raise LogicError("buffer too small", - status_code.INVALID_VALUE, "Image") - - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_image_2d(ptr, context.ptr, flags, fmt.ptr, - width, height, pitch, c_buf)) - self.ptr = ptr[0] - elif dims == 3: - width, height, depth = shape - pitch_x, pitch_y = 0, 0 - if pitches is not None: - try: - pitch_x, pitch_y = pitches - except ValueError: - raise LogicError("invalid length of pitch tuple", - status_code.INVALID_VALUE, "Image") - - # check buffer size - if (hostbuf is not None and - (max(max(pitch_x, width * fmt.itemsize) * - height, pitch_y) * depth > size)): - raise LogicError("buffer too small", - status_code.INVALID_VALUE, "Image") - - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_image_3d( - ptr, context.ptr, flags, fmt.ptr, - width, height, depth, pitch_x, pitch_y, c_buf)) - - self.ptr = ptr[0] - else: - raise LogicError("invalid dimension", - status_code.INVALID_VALUE, "Image") - - def __init__(self, context, flags, format, shape=None, pitches=None, - hostbuf=None, is_array=False, buffer=None): - - if shape is None and hostbuf is None: - raise Error("'shape' must be passed if 'hostbuf' is not given") - - if shape is None and hostbuf is not None: - shape = hostbuf.shape - - if hostbuf is not None and not \ - (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)): - from warnings import warn - warn("'hostbuf' was passed, but no memory flags to make use of it.") - - if hostbuf is None and pitches is not None: - raise Error("'pitches' may only be given if 'hostbuf' is given") - - if context._get_cl_version() >= (1, 2) and get_cl_header_version() >= (1, 2): - if buffer is not None and is_array: - raise ValueError( - "'buffer' and 'is_array' are mutually exclusive") - - if len(shape) == 3: - if buffer is not None: - raise TypeError( - "'buffer' argument is not supported for 3D arrays") - elif is_array: - image_type = mem_object_type.IMAGE2D_ARRAY - else: - image_type = mem_object_type.IMAGE3D - - elif len(shape) == 2: - if buffer is not None: - raise TypeError( - "'buffer' argument is not supported for 2D arrays") - elif is_array: - image_type = mem_object_type.IMAGE1D_ARRAY - else: - image_type = mem_object_type.IMAGE2D - - elif len(shape) == 1: - if buffer is not None: - image_type = mem_object_type.IMAGE1D_BUFFER - elif is_array: - raise TypeError("array of zero-dimensional images not supported") - else: - image_type = mem_object_type.IMAGE1D - - else: - raise ValueError("images cannot have more than three dimensions") - - desc = ImageDescriptor() - - desc.image_type = image_type - desc.shape = shape # also sets desc.array_size - - if pitches is None: - desc.pitches = (0, 0) - else: - desc.pitches = pitches - - desc.num_mip_levels = 0 # per CL 1.2 spec - desc.num_samples = 0 # per CL 1.2 spec - desc.buffer = buffer - - self.__init_dispatch(context, flags, format, desc, hostbuf) - else: - # legacy init for CL 1.1 and older - if is_array: - raise TypeError("'is_array=True' is not supported for CL < 1.2") - # if num_mip_levels is not None: - # raise TypeError( - # "'num_mip_levels' argument is not supported for CL < 1.2") - # if num_samples is not None: - # raise TypeError( - # "'num_samples' argument is not supported for CL < 1.2") - if buffer is not None: - raise TypeError("'buffer' argument is not supported for CL < 1.2") - - self.__init_dispatch(context, flags, format, shape, - pitches, hostbuf) - - def get_image_info(self, param): - info = _ffi.new('generic_info*') - _handle_error(_lib.image__get_image_info(self.ptr, param, info)) - return _generic_info_to_python(info) - - @property - def shape(self): - if self.type == mem_object_type.IMAGE2D: - return (self.width, self.height) - elif self.type == mem_object_type.IMAGE3D: - return (self.width, self.height, self.depth) - else: - raise LogicError("only images have shapes") - - -class _ImageInfoGetter: - def __init__(self, event): - from warnings import warn - warn("Image.image.attr is deprecated. " - "Use Image.attr directly, instead.") - - self.event = event - - def __getattr__(self, name): - try: - inf_attr = getattr(image_info, name.upper()) - except AttributeError: - raise AttributeError("%s has no attribute '%s'" - % (type(self), name)) - else: - return self.event.get_image_info(inf_attr) - - -Image.image = property(_ImageInfoGetter) - -# }}} - - -# {{{ Sampler - -class Sampler(_Common, _CLKernelArg): - _id = 'sampler' - - def __init__(self, context, normalized_coords, addressing_mode, filter_mode): - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_sampler( - ptr, context.ptr, normalized_coords, addressing_mode, filter_mode)) - self.ptr = ptr[0] - -# }}} - - -# {{{ GLTexture - -class GLTexture(Image, _GLObject): - _id = 'gl_texture' - - def __init__(self, context, flags, texture_target, miplevel, texture, dims=None): - ptr = _ffi.new('clobj_t*') - _handle_error(_lib.create_from_gl_texture( - ptr, context.ptr, flags, texture_target, miplevel, texture)) - self.ptr = ptr[0] - -# }}} - - -# {{{ DeviceTopologyAmd - -class DeviceTopologyAmd(object): - # Hack around fmt.__dict__ check in test_wrapper.py - __dict__ = {} - __slots__ = ('ptr',) - - def __init__(self, bus=0, device=0, function=0): - self.ptr = _ffi.new("cl_device_topology_amd*") - self.bus = bus - self.device = device - self.function = function - - def _check_range(self, value, prop=None): - if (value < -127) or (value > 127): - raise ValueError("Value %s not in range [-127, 127].") - - @_cffi_property('pcie') - def _pcie(self): - return self.ptr - - @property - def bus(self): - return self._pcie.bus - - @bus.setter - def bus(self, value): - self._check_range(value) - self._pcie.bus = value - - @property - def device(self): - return self._pcie.device - - @device.setter - def device(self, value): - self._pcie.device = value - - @property - def function(self): - return self._pcie.function - - @function.setter - def function(self, value): - self._pcie.function = value - -# }}} - - -# {{{ get_info monkeypatchery - -def add_get_info_attrs(cls, info_method, info_class, cacheable_attrs=None): - if cacheable_attrs is None: - cacheable_attrs = [] - - def make_getinfo(info_method, info_name, info_attr): - def result(self): - return info_method(self, info_attr) - - return property(result) - - def make_cacheable_getinfo(info_method, info_name, cache_attr, info_attr): - def result(self): - try: - return getattr(self, cache_attr) - except AttributeError: - pass - - result = info_method(self, info_attr) - setattr(self, cache_attr, result) - return result - - return property(result) - - for info_name, info_value in six.iteritems(info_class.__dict__): - if info_name == "to_string" or info_name.startswith("_"): - continue - - info_lower = info_name.lower() - info_constant = getattr(info_class, info_name) - if info_name in cacheable_attrs: - cache_attr = intern("_info_cache_" + info_lower) - setattr(cls, info_lower, make_cacheable_getinfo( - info_method, info_lower, cache_attr, info_constant)) - else: - setattr(cls, info_lower, make_getinfo( - info_method, info_name, info_constant)) - - -add_get_info_attrs(Platform, Platform.get_info, platform_info), -add_get_info_attrs(Device, Device.get_info, device_info, - ["PLATFORM", "MAX_WORK_GROUP_SIZE", "MAX_COMPUTE_UNITS"]) -add_get_info_attrs(Context, Context.get_info, context_info) -add_get_info_attrs(CommandQueue, CommandQueue.get_info, command_queue_info, - ["CONTEXT", "DEVICE"]) -add_get_info_attrs(Event, Event.get_info, event_info) -add_get_info_attrs(MemoryObjectHolder, MemoryObjectHolder.get_info, mem_info) -add_get_info_attrs(Image, Image.get_image_info, image_info) -add_get_info_attrs(Kernel, Kernel.get_info, kernel_info) -add_get_info_attrs(Sampler, Sampler.get_info, sampler_info) - -# }}} - - -if have_gl(): - def gl_object_get_gl_object(self): - return self.get_gl_object_info()[1] - - GLBuffer.gl_object = property(gl_object_get_gl_object) - GLTexture.gl_object = property(gl_object_get_gl_object) - -# vim: foldmethod=marker diff --git a/pyopencl/characterize/__init__.py b/pyopencl/characterize/__init__.py index 26a4a688bba94c3576fc45f9e98fd8e6ef0a6e63..873e1c11c834b4c9b0dfa28837440f318b3a7b21 100644 --- a/pyopencl/characterize/__init__.py +++ b/pyopencl/characterize/__init__.py @@ -387,3 +387,33 @@ def has_struct_arg_count_bug(dev, ctx=None): return "pocl" return False + + +def _may_have_svm(dev): + has_svm = (dev.platform._get_cl_version() >= (2, 0) and + cl.get_cl_header_version() >= (2, 0)) + + if dev.platform.name == "Portable Computing Language": + has_svm = ( + get_pocl_version(dev.platform) >= (1, 0) + and cl.get_cl_header_version() >= (2, 0)) + + return has_svm + + +def has_coarse_grain_buffer_svm(dev): + return (_may_have_svm(dev) and + bool(dev.svm_capabilities + & cl.device_svm_capabilities.COARSE_GRAIN_BUFFER)) + + +def has_fine_grain_buffer_svm(dev): + return (_may_have_svm(dev) and + bool(dev.svm_capabilities + & cl.device_svm_capabilities.FINE_GRAIN_BUFFER)) + + +def has_fine_grain_system_svm(dev): + return (_may_have_svm(dev) and + bool(dev.svm_capabilities + & cl.device_svm_capabilities.FINE_GRAIN_SYSTEM)) diff --git a/pyopencl/invoker.py b/pyopencl/invoker.py index 8cad3f258c0036f24fd5f95e34b3b512d4f61542..b580c5375e298ff5d5864c52cebd656af42eac89 100644 --- a/pyopencl/invoker.py +++ b/pyopencl/invoker.py @@ -28,7 +28,7 @@ import sys import numpy as np from warnings import warn -from pyopencl._cffi import ffi as _ffi +import pyopencl._cl as _cl from pytools.persistent_dict import WriteOncePersistentDict from pyopencl.tools import _NumpyTypesKeyBuilder @@ -44,7 +44,7 @@ _size_t_char = ({ 4: 'L', 2: 'H', 1: 'B', -})[_ffi.sizeof('size_t')] +})[_cl._sizeof_size_t()] _type_char_map = { 'n': _size_t_char.lower(), 'N': _size_t_char @@ -59,27 +59,24 @@ del _size_t_char def generate_buffer_arg_setter(gen, arg_idx, buf_var): from pytools.py_codegen import Indentation - if _CPY2: + if _CPY2 or _PYPY: # https://github.com/numpy/numpy/issues/5381 gen("if isinstance({buf_var}, np.generic):".format(buf_var=buf_var)) with Indentation(gen): - gen("{buf_var} = np.getbuffer({buf_var})".format(buf_var=buf_var)) + if _PYPY: + gen("{buf_var} = np.asarray({buf_var})".format(buf_var=buf_var)) + else: + gen("{buf_var} = np.getbuffer({buf_var})".format(buf_var=buf_var)) gen(""" - c_buf, sz, _ = _cl._c_buffer_from_obj({buf_var}) - status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, c_buf, sz) - if status != _ffi.NULL: - _handle_error(status) + self._set_arg_buf({arg_idx}, {buf_var}) """ .format(arg_idx=arg_idx, buf_var=buf_var)) def generate_bytes_arg_setter(gen, arg_idx, buf_var): gen(""" - status = _lib.kernel__set_arg_buf(self.ptr, {arg_idx}, - {buf_var}, len({buf_var})) - if status != _ffi.NULL: - _handle_error(status) + self._set_arg_buf({arg_idx}, {buf_var}) """ .format(arg_idx=arg_idx, buf_var=buf_var)) @@ -89,11 +86,9 @@ def generate_generic_arg_handler(gen, arg_idx, arg_var): gen(""" if {arg_var} is None: - status = _lib.kernel__set_arg_null(self.ptr, {arg_idx}) - if status != _ffi.NULL: - _handle_error(status) - elif isinstance({arg_var}, _cl._CLKernelArg): - self._set_arg_clkernelarg({arg_idx}, {arg_var}) + self._set_arg_null({arg_idx}) + elif isinstance({arg_var}, _KERNEL_ARG_CLASSES): + self.set_arg({arg_idx}, {arg_var}) """ .format(arg_idx=arg_idx, arg_var=arg_var)) @@ -289,10 +284,8 @@ def wrap_in_error_handler(body, arg_names): def add_local_imports(gen): gen("import numpy as np") - gen("import pyopencl.cffi_cl as _cl") - gen( - "from pyopencl.cffi_cl import _lib, " - "_ffi, _handle_error, _CLKernelArg") + gen("import pyopencl._cl as _cl") + gen("from pyopencl import _KERNEL_ARG_CLASSES") gen("") @@ -359,7 +352,7 @@ def _generate_enqueue_and_set_args_module(function_name, invoker_cache = WriteOncePersistentDict( - "pyopencl-invoker-cache-v1", + "pyopencl-invoker-cache-v6", key_builder=_NumpyTypesKeyBuilder()) diff --git a/pyopencl/mempool.py b/pyopencl/mempool.py deleted file mode 100644 index 6b1740ec3bedec01047d29bae52c53deee0edb21..0000000000000000000000000000000000000000 --- a/pyopencl/mempool.py +++ /dev/null @@ -1,275 +0,0 @@ -from __future__ import division -from __future__ import absolute_import -import six - -__copyright__ = """ -Copyright (C) 2014 Andreas Kloeckner -""" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - - -import numpy as np -import pyopencl as cl -from pyopencl.tools import bitlog2 - - -# {{{ allocators - -class AllocatorBase(object): - def __call__(self, nbytes): - try_count = 0 - - while try_count < 2: - try: - return self.allocate(nbytes) - except cl.Error as e: - if not e.is_out_of_memory(): - raise - try_count += 1 - if try_count == 2: - raise - - self.try_release_blocks() - - def try_release_blocks(self): - import gc - gc.collect() - - def free(self, buf): - buf.release() - - -class DeferredAllocator(AllocatorBase): - is_deferred = True - - def __init__(self, context, mem_flags=cl.mem_flags.READ_WRITE): - self.context = context - self.mem_flags = mem_flags - - def allocate(self, nbytes): - return cl.Buffer(self.context, self.mem_flags, nbytes) - - -_zero = np.array([0, 0, 0, 0], dtype=np.int8) - - -class ImmediateAllocator(AllocatorBase): - is_deferred = False - - def __init__(self, queue, mem_flags=cl.mem_flags.READ_WRITE): - self.context = queue.context - self.queue = queue - self.mem_flags = mem_flags - - def allocate(self, nbytes): - buf = cl.Buffer(self.context, self.mem_flags, nbytes) - - # Make sure the buffer gets allocated right here and right now. - # This looks (and is) expensive. But immediate allocators - # have their main use in memory pools, whose basic assumption - # is that allocation is too expensive anyway--but they rely - # on exact 'out-of-memory' information. - - from pyopencl.cffi_cl import _enqueue_write_buffer - _enqueue_write_buffer( - self.queue, buf, - _zero[:min(len(_zero), nbytes)], - is_blocking=False) - - # No need to wait for completion here. clWaitForEvents (e.g.) - # cannot return mem object allocation failures. This implies that - # the buffer is faulted onto the device on enqueue. - - return buf - -# }}} - - -# {{{ memory pool - -class MemoryPool(object): - mantissa_bits = 2 - mantissa_mask = (1 << mantissa_bits) - 1 - - def __init__(self, allocator): - self.allocator = allocator - - self.bin_nr_to_bin = {} - - if self.allocator.is_deferred: - from warnings import warn - warn("Memory pools expect non-deferred " - "semantics from their allocators. You passed a deferred " - "allocator, i.e. an allocator whose allocations can turn out to " - "be unavailable long after allocation.", statcklevel=2) - - self.active_blocks = 0 - - self.stop_holding_flag = False - - @classmethod - def bin_number(cls, size): - bl2 = bitlog2(size) - - mantissa_bits = cls.mantissa_bits - if bl2 >= mantissa_bits: - shifted = size >> (bl2 - mantissa_bits) - else: - shifted = size << (mantissa_bits - bl2) - - assert not (size and (shifted & (1 << mantissa_bits)) == 0) - - chopped = shifted & cls.mantissa_mask - - return bl2 << mantissa_bits | chopped - - @classmethod - def alloc_size(cls, bin_nr): - mantissa_bits = cls.mantissa_bits - - exponent = bin_nr >> mantissa_bits - mantissa = bin_nr & cls.mantissa_mask - - exp_minus_mbits = exponent-mantissa_bits - if exp_minus_mbits >= 0: - ones = (1 << exp_minus_mbits) - 1 - head = ((1 << mantissa_bits) | mantissa) << exp_minus_mbits - else: - ones = 0 - head = ((1 << mantissa_bits) | mantissa) >> -exp_minus_mbits - - assert not (ones & head) - return head | ones - - def stop_holding(self): - self.stop_holding_flag = True - self.free_held() - - def free_held(self): - for bin_nr, bin_list in six.iteritems(self.bin_nr_to_bin): - while bin_list: - self.allocator.free(bin_list.pop()) - - @property - def held_blocks(self): - return sum( - len(bin_list) - for bin_list in six.itervalues(self.bin_nr_to_bin)) - - def allocate(self, size): - bin_nr = self.bin_number(size) - bin_list = self.bin_nr_to_bin.setdefault(bin_nr, []) - - alloc_sz = self.alloc_size(bin_nr) - - if bin_list: - # if (m_trace) - # std::cout - # << "[pool] allocation of size " << size - # << " served from bin " << bin_nr - # << " which contained " << bin_list.size() - # << " entries" << std::endl; - self.active_blocks += 1 - return PooledBuffer(self, bin_list.pop(), alloc_sz) - - assert self.bin_number(alloc_sz) == bin_nr - - # if (m_trace) - # std::cout << "[pool] allocation of size " << size - # << " required new memory" << std::endl; - - try: - result = self.allocator(alloc_sz) - self.active_blocks += 1 - return PooledBuffer(self, result, alloc_sz) - except cl.MemoryError: - pass - - # if (m_trace) - # std::cout << "[pool] allocation triggered OOM, running GC" << std::endl; - - self.allocator.try_release_blocks() - - if bin_list: - return bin_list.pop() - - # if (m_trace) - # std::cout << "[pool] allocation still OOM after GC" << std::endl; - - for _ in self._try_to_free_memory(): - try: - result = self.allocator(alloc_sz) - self.active_blocks += 1 - return PooledBuffer(self, result, alloc_sz) - except cl.MemoryError: - pass - - raise cl.MemoryError( - "failed to free memory for allocation", - routine="memory_pool::allocate", - code=cl.status_code.MEM_OBJECT_ALLOCATION_FAILURE) - - __call__ = allocate - - def free(self, buf, size): - self.active_blocks -= 1 - bin_nr = self.bin_number(size) - - if not self.stop_holding_flag: - self.bin_nr_to_bin.setdefault(bin_nr, []).append(buf) - - # if (m_trace) - # std::cout << "[pool] block of size " << size << " returned to bin " - # << bin_nr << " which now contains " << get_bin(bin_nr).size() - # << " entries" << std::endl; - else: - self.allocator.free(buf) - - def _try_to_free_memory(self): - for bin_nr, bin_list in six.iteritems(self.bin_nr_to_bin): - while bin_list: - self.allocator.free(bin_list.pop()) - yield - - -class PooledBuffer(cl.MemoryObjectHolder): - _id = 'buffer' - - def __init__(self, pool, buf, alloc_sz): - self.pool = pool - self.buf = buf - self.ptr = buf.ptr - self._alloc_sz = alloc_sz - - def release(self): - self.pool.free(self.buf, self._alloc_sz) - self.buf = None - self.ptr = None - - def __del__(self): - if self.buf is not None: - self.release() - -# }}} - - -# vim: foldmethod=marker diff --git a/pyopencl/tools.py b/pyopencl/tools.py index a3c577ef88854042b249c6d83651147af9ae298f..05ccc5d079cbf42c5fe415adc190c845a15bfcac 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -35,7 +35,7 @@ import numpy as np from decorator import decorator import pyopencl as cl from pytools import memoize, memoize_method -from pyopencl.cffi_cl import _lib +from pyopencl._cl import bitlog2 # noqa: F401 from pytools.persistent_dict import KeyBuilder as KeyBuilderBase import re @@ -60,9 +60,11 @@ _register_types() # {{{ imported names -bitlog2 = _lib.bitlog2 -from pyopencl.mempool import ( # noqa - PooledBuffer, DeferredAllocator, ImmediateAllocator, MemoryPool) +from pyopencl._cl import ( # noqa + PooledBuffer as PooledBuffer, + _tools_DeferredAllocator as DeferredAllocator, + _tools_ImmediateAllocator as ImmediateAllocator, + MemoryPool as MemoryPool) # }}} diff --git a/pyopencl/version.py b/pyopencl/version.py index ddb2bc1439c4102c64a1eb14ca4ed146ea241dd3..f46939dfb75cb4e66e1c297fd8a5837099f2034f 100644 --- a/pyopencl/version.py +++ b/pyopencl/version.py @@ -1,3 +1,3 @@ -VERSION = (2018, 1, 1) +VERSION = (2018, 2) VERSION_STATUS = "" VERSION_TEXT = ".".join(str(x) for x in VERSION) + VERSION_STATUS diff --git a/setup.py b/setup.py index 1c9ca77d0b1a05716ce20caaa09f3e7ac0cbde4f..fcf668bf7532b683605da9ceb34ed5cdf215a9d6 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,82 @@ THE SOFTWARE. import sys from os.path import exists +import setuptools +from setuptools.command.build_ext import build_ext + + +# {{{ boilerplate from https://github.com/pybind/python_example/blob/2ed5a68759cd6ff5d2e5992a91f08616ef457b5c/setup.py # noqa + +class get_pybind_include(object): # noqa: N801 + """Helper class to determine the pybind11 include path + + The purpose of this class is to postpone importing pybind11 + until it is actually installed, so that the ``get_include()`` + method can be invoked. """ + + def __init__(self, user=False): + self.user = user + + def __str__(self): + import pybind11 + return pybind11.get_include(self.user) + + +# As of Python 3.6, CCompiler has a `has_flag` method. +# cf http://bugs.python.org/issue26689 +def has_flag(compiler, flagname): + """Return a boolean indicating whether a flag name is supported on + the specified compiler. + """ + import tempfile + with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f: + f.write('int main (int argc, char **argv) { return 0; }') + try: + compiler.compile([f.name], extra_postargs=[flagname]) + except setuptools.distutils.errors.CompileError: + return False + return True + + +def cpp_flag(compiler): + """Return the -std=c++[11/14] compiler flag. + + The c++14 is prefered over c++11 (when it is available). + """ + if has_flag(compiler, '-std=c++14'): + return '-std=c++14' + elif has_flag(compiler, '-std=c++11'): + return '-std=c++11' + else: + raise RuntimeError('Unsupported compiler -- at least C++11 support ' + 'is needed!') + + +class BuildExt(build_ext): + """A custom build extension for adding compiler-specific options.""" + c_opts = { + 'msvc': ['/EHsc'], + 'unix': [], + } + + if sys.platform == 'darwin': + c_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7'] + + def build_extensions(self): + ct = self.compiler.compiler_type + opts = self.c_opts.get(ct, []) + if ct == 'unix': + opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version()) + opts.append(cpp_flag(self.compiler)) + if has_flag(self.compiler, '-fvisibility=hidden'): + opts.append('-fvisibility=hidden') + elif ct == 'msvc': + opts.append('/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version()) + for ext in self.extensions: + ext.extra_compile_args = opts + build_ext.build_extensions(self) + +# }}} def get_config_schema(): @@ -38,7 +114,11 @@ def get_config_schema(): IncludeDir, LibraryDir, Libraries, \ Switch, StringListOption - default_cxxflags = ['-std=gnu++11'] + default_cxxflags = [ + # Required for pybind11: + # https://pybind11.readthedocs.io/en/stable/faq.html#someclass-declared-with-greater-visibility-than-the-type-of-its-field-someclass-member-wattributes + "-fvisibility=hidden" + ] if 'darwin' in sys.platform: import platform @@ -100,7 +180,7 @@ def get_config_schema(): def main(): from setuptools import find_packages from aksetup_helper import (hack_distutils, get_config, setup, - check_git_submodules) + check_git_submodules, NumpyExtension) check_git_submodules() hack_distutils() @@ -133,6 +213,8 @@ def main(): conf["EXTRA_DEFINES"] = extra_defines + INCLUDE_DIRS = conf["CL_INC_DIR"] + ["pybind11/include"] # noqa: N806 + ver_dic = {} version_file = open("pyopencl/version.py") try: @@ -181,22 +263,6 @@ def main(): print("https://pypi.python.org/pypi/pyopencl") sys.exit(1) - # {{{ write cffi build script - - with open("cffi_build.py.in", "rt") as f: - build_script_template = f.read() - - format_args = {} - for k, v in conf.items(): - format_args[k] = repr(v) - - build_script = build_script_template.format(**format_args) - - with open("cffi_build.py", "wt") as f: - f.write(build_script) - - # }}} - setup(name="pyopencl", # metadata version=ver_dic["VERSION_TEXT"], @@ -217,7 +283,6 @@ def main(): 'Programming Language :: C++', 'Programming Language :: Python', 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.2', @@ -230,24 +295,43 @@ def main(): # build info packages=find_packages(), + ext_modules=[ + NumpyExtension("pyopencl._cl", + [ + "src/wrap_constants.cpp", + "src/wrap_cl.cpp", + "src/wrap_cl_part_1.cpp", + "src/wrap_cl_part_2.cpp", + "src/wrap_mempool.cpp", + "src/bitlog.cpp", + ], + include_dirs=INCLUDE_DIRS + [ + get_pybind_include(), + get_pybind_include(user=True) + ], + library_dirs=conf["CL_LIB_DIR"], + libraries=conf["CL_LIBNAME"], + define_macros=list(conf["EXTRA_DEFINES"].items()), + extra_compile_args=conf["CXXFLAGS"], + extra_link_args=conf["LDFLAGS"], + language='c++', + ), + ], + setup_requires=[ + "pybind11", "numpy", - "cffi>=1.1.0", ], install_requires=[ "numpy", "pytools>=2017.6", - "pytest>=2", "decorator>=3.2.0", - "cffi>=1.1.0", "appdirs>=1.4.0", "six>=1.9.0", # "Mako>=0.3.6", ], - cffi_modules=["cffi_build.py:ffi"], - include_package_data=True, package_data={ "pyopencl": [ @@ -258,8 +342,11 @@ def main(): ] }, + cmdclass={'build_ext': BuildExt}, zip_safe=False) if __name__ == '__main__': main() + +# vim: foldmethod=marker diff --git a/src/bitlog.cpp b/src/bitlog.cpp new file mode 100644 index 0000000000000000000000000000000000000000..88b820fa362668f9af11a31c8913dbeb03052e94 --- /dev/null +++ b/src/bitlog.cpp @@ -0,0 +1,27 @@ +#include "bitlog.hpp" + + + + +/* from http://graphics.stanford.edu/~seander/bithacks.html */ +const char pyopencl::log_table_8[] = +{ + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +}; + + diff --git a/src/bitlog.hpp b/src/bitlog.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e3ffbe01fe0ae00df25102246922deaa8dbf8b2e --- /dev/null +++ b/src/bitlog.hpp @@ -0,0 +1,46 @@ +// Base-2 logarithm bithack. + +#ifndef _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP +#define _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP + + +#include +#include + + +namespace pyopencl +{ + extern const char log_table_8[]; + + inline unsigned bitlog2_16(uint16_t v) + { + if (unsigned long t = v >> 8) + return 8+log_table_8[t]; + else + return log_table_8[v]; + } + + inline unsigned bitlog2_32(uint32_t v) + { + if (uint16_t t = v >> 16) + return 16+bitlog2_16(t); + else + return bitlog2_16(v); + } + + inline unsigned bitlog2(unsigned long v) + { +#if (ULONG_MAX != 4294967295) + if (uint32_t t = v >> 32) + return 32+bitlog2_32(t); + else +#endif + return bitlog2_32(v); + } +} + + + + + +#endif diff --git a/src/c_wrapper/bitlog.cpp b/src/c_wrapper/bitlog.cpp deleted file mode 100644 index 418eb4d8f8f5ad8b2b15131b9821e9d4cb612509..0000000000000000000000000000000000000000 --- a/src/c_wrapper/bitlog.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "wrap_cl.h" -#include "function.h" - -#include -#include - -/* from http://graphics.stanford.edu/~seander/bithacks.html */ -static const char log_table_8[] = { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 -}; - -static PYOPENCL_INLINE unsigned -bitlog2_16(uint16_t v) -{ - if (unsigned long t = v >> 8) { - return 8 + log_table_8[t]; - } else { - return log_table_8[v]; - } -} - -static PYOPENCL_INLINE unsigned -bitlog2_32(uint32_t v) -{ - if (uint16_t t = v >> 16) { - return 16 + bitlog2_16(t); - } else { - return bitlog2_16(v); - } -} - -unsigned -bitlog2(unsigned long v) -{ -#if (ULONG_MAX != 4294967295) - if (uint32_t t = v >> 32) { - return 32 + bitlog2_32(t); - } else { -#endif - return bitlog2_32(v); -#if (ULONG_MAX != 4294967295) - } -#endif -} diff --git a/src/c_wrapper/buffer.cpp b/src/c_wrapper/buffer.cpp deleted file mode 100644 index 70e1ff3ed1074f5ecdb9f046f70e15672e2fd9b6..0000000000000000000000000000000000000000 --- a/src/c_wrapper/buffer.cpp +++ /dev/null @@ -1,235 +0,0 @@ -#include -#include "buffer.h" -#include "context.h" -#include "command_queue.h" -#include "event.h" - -template void print_clobj(std::ostream&, const buffer*); - -PYOPENCL_USE_RESULT static PYOPENCL_INLINE buffer* -new_buffer(cl_mem mem) -{ - return pyopencl_convert_obj(buffer, clReleaseMemObject, mem); -} - -#if PYOPENCL_CL_VERSION >= 0x1010 -PYOPENCL_USE_RESULT buffer* -buffer::get_sub_region(size_t orig, size_t size, cl_mem_flags flags) const -{ - cl_buffer_region reg = {orig, size}; - - auto mem = retry_mem_error([&] { - return pyopencl_call_guarded(clCreateSubBuffer, PYOPENCL_CL_CASTABLE_THIS, flags, - CL_BUFFER_CREATE_TYPE_REGION, ®); - }); - return new_buffer(mem); -} - -#endif - -// c wrapper - -// Buffer -error* -create_buffer(clobj_t *buffer, clobj_t _ctx, cl_mem_flags flags, - size_t size, void *hostbuf) -{ - auto ctx = static_cast(_ctx); - return c_handle_retry_mem_error([&] { - auto mem = pyopencl_call_guarded(clCreateBuffer, ctx, - flags, size, hostbuf); - *buffer = new_buffer(mem); - }); -} - -error* -enqueue_read_buffer(clobj_t *evt, clobj_t _queue, clobj_t _mem, - void *buffer, size_t size, size_t device_offset, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block, void *pyobj) -{ - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto mem = static_cast(_mem); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueReadBuffer, queue, mem, bool(block), device_offset, - size, buffer, wait_for, nanny_event_out(evt, pyobj)); - }); -} - -error* -enqueue_write_buffer(clobj_t *evt, clobj_t _queue, clobj_t _mem, - const void *buffer, size_t size, size_t device_offset, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block, void *pyobj) -{ - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto mem = static_cast(_mem); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueWriteBuffer, queue, mem, bool(block), device_offset, - size, buffer, wait_for, nanny_event_out(evt, pyobj)); - }); -} - -error* -enqueue_copy_buffer(clobj_t *evt, clobj_t _queue, clobj_t _src, clobj_t _dst, - ptrdiff_t byte_count, size_t src_offset, size_t dst_offset, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ - auto queue = static_cast(_queue); - auto src = static_cast(_src); - auto dst = static_cast(_dst); - return c_handle_error([&] { - if (byte_count < 0) { - size_t byte_count_src = 0; - size_t byte_count_dst = 0; - pyopencl_call_guarded( - clGetMemObjectInfo, src, CL_MEM_SIZE, - sizeof(byte_count), &byte_count_src, nullptr); - pyopencl_call_guarded( - clGetMemObjectInfo, src, CL_MEM_SIZE, - sizeof(byte_count), &byte_count_dst, nullptr); - byte_count = std::min(byte_count_src, byte_count_dst); - } - const auto wait_for = buf_from_class(_wait_for, - num_wait_for); - retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueCopyBuffer, queue, src, dst, src_offset, - dst_offset, byte_count, wait_for, event_out(evt)); - }); - }); -} - - -error* -enqueue_fill_buffer(clobj_t *evt, clobj_t _queue, clobj_t _mem, void *pattern, - size_t psize, size_t offset, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto mem = static_cast(_mem); - // TODO debug print pattern - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueFillBuffer, queue, mem, pattern, - psize, offset, size, wait_for, - event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED(clEnqueueFillBuffer, "CL 1.1 and below") -#endif -} - - -// {{{ rectangular transfers - -error* -enqueue_read_buffer_rect(clobj_t *evt, clobj_t _queue, clobj_t _mem, void *buf, - const size_t *_buf_orig, size_t buf_orig_l, - const size_t *_host_orig, size_t host_orig_l, - const size_t *_reg, size_t reg_l, - const size_t *_buf_pitches, size_t buf_pitches_l, - const size_t *_host_pitches, size_t host_pitches_l, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block, void *pyobj) -{ -#if PYOPENCL_CL_VERSION >= 0x1010 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto mem = static_cast(_mem); - ConstBuffer buf_orig(_buf_orig, buf_orig_l); - ConstBuffer host_orig(_host_orig, host_orig_l); - ConstBuffer reg(_reg, reg_l, 1); - ConstBuffer buf_pitches(_buf_pitches, buf_pitches_l); - ConstBuffer host_pitches(_host_pitches, host_pitches_l); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueReadBufferRect, queue, mem, bool(block), buf_orig, - host_orig, reg, buf_pitches[0], buf_pitches[1], host_pitches[0], - host_pitches[1], buf, wait_for, nanny_event_out(evt, pyobj)); - }); -#else - PYOPENCL_UNSUPPORTED(clEnqueueReadBufferRect, "CL 1.0") -#endif -} - -error* -enqueue_write_buffer_rect(clobj_t *evt, clobj_t _queue, clobj_t _mem, void *buf, - const size_t *_buf_orig, size_t buf_orig_l, - const size_t *_host_orig, size_t host_orig_l, - const size_t *_reg, size_t reg_l, - const size_t *_buf_pitches, size_t buf_pitches_l, - const size_t *_host_pitches, size_t host_pitches_l, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block, void *pyobj) -{ -#if PYOPENCL_CL_VERSION >= 0x1010 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto mem = static_cast(_mem); - ConstBuffer buf_orig(_buf_orig, buf_orig_l); - ConstBuffer host_orig(_host_orig, host_orig_l); - ConstBuffer reg(_reg, reg_l, 1); - ConstBuffer buf_pitches(_buf_pitches, buf_pitches_l); - ConstBuffer host_pitches(_host_pitches, host_pitches_l); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueWriteBufferRect, queue, mem, bool(block), buf_orig, - host_orig, reg, buf_pitches[0], buf_pitches[1], host_pitches[0], - host_pitches[1], buf, wait_for, nanny_event_out(evt, pyobj)); - }); -#else - PYOPENCL_UNSUPPORTED(clEnqueueWriteBufferRect, "CL 1.0") -#endif -} - -error* -enqueue_copy_buffer_rect(clobj_t *evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, const size_t *_src_orig, - size_t src_orig_l, const size_t *_dst_orig, - size_t dst_orig_l, const size_t *_reg, size_t reg_l, - const size_t *_src_pitches, size_t src_pitches_l, - const size_t *_dst_pitches, size_t dst_pitches_l, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x1010 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto src = static_cast(_src); - auto dst = static_cast(_dst); - ConstBuffer src_orig(_src_orig, src_orig_l); - ConstBuffer dst_orig(_dst_orig, dst_orig_l); - ConstBuffer reg(_reg, reg_l, 1); - ConstBuffer src_pitches(_src_pitches, src_pitches_l); - ConstBuffer dst_pitches(_dst_pitches, dst_pitches_l); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueCopyBufferRect, queue, src, dst, src_orig, dst_orig, - reg, src_pitches[0], src_pitches[1], dst_pitches[0], - dst_pitches[1], wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED(clEnqueueCopyBufferRect, "CL 1.0") -#endif -} - -// }}} - -error* -buffer__get_sub_region(clobj_t *_sub_buf, clobj_t _buf, size_t orig, - size_t size, cl_mem_flags flags) -{ -#if PYOPENCL_CL_VERSION >= 0x1010 - auto buf = static_cast(_buf); - return c_handle_error([&] { - *_sub_buf = buf->get_sub_region(orig, size, flags); - }); -#else - PYOPENCL_UNSUPPORTED(clCreateSubBuffer, "CL 1.0") -#endif -} diff --git a/src/c_wrapper/buffer.h b/src/c_wrapper/buffer.h deleted file mode 100644 index c97a7919b56e5fda3bec2e739520f21991cbc544..0000000000000000000000000000000000000000 --- a/src/c_wrapper/buffer.h +++ /dev/null @@ -1,27 +0,0 @@ -#include "memory_object.h" -#include "clhelper.h" - -#ifndef __PYOPENCL_BUFFER_H -#define __PYOPENCL_BUFFER_H - -// {{{ buffer - -class buffer : public memory_object { -public: - PYOPENCL_DEF_CL_CLASS(BUFFER); - PYOPENCL_INLINE - buffer(cl_mem mem, bool retain) - : memory_object(mem, retain) - {} - -#if PYOPENCL_CL_VERSION >= 0x1010 - PYOPENCL_USE_RESULT buffer *get_sub_region(size_t orig, size_t size, - cl_mem_flags flags) const; -#endif -}; - -extern template void print_clobj(std::ostream&, const buffer*); - -// }}} - -#endif diff --git a/src/c_wrapper/clhelper.h b/src/c_wrapper/clhelper.h deleted file mode 100644 index d0aff85c329ead7956ed2621fa4c00a1c887ab7c..0000000000000000000000000000000000000000 --- a/src/c_wrapper/clhelper.h +++ /dev/null @@ -1,254 +0,0 @@ -#include "error.h" -#include "clobj.h" - -#ifndef __PYOPENCL_CLHELPER_H -#define __PYOPENCL_CLHELPER_H - -template -class _CLObjOutArg : public OutArg { - typedef typename CLObj::cl_type CLType; - clobj_t *const m_ret; - CLType m_clobj; - cl_int (CL_API_CALL *m_release)(CLType); - const char *m_name; - std::tuple m_t1; - template - PYOPENCL_INLINE CLObj* - __new_obj(seq) - { - return new CLObj(m_clobj, false, std::get(m_t1)...); - } -public: - PYOPENCL_INLINE - _CLObjOutArg(clobj_t *ret, cl_int (CL_API_CALL *release)(CLType), - const char *name, T... t1) noexcept - : m_ret(ret), m_clobj(nullptr), m_release(release), - m_name(name), m_t1(t1...) - { - } - PYOPENCL_INLINE - _CLObjOutArg(_CLObjOutArg &&other) noexcept - : m_ret(other.m_ret), m_clobj(other.m_clobj), - m_release(other.m_release), m_name(other.m_name) - { - std::swap(m_t1, other.m_t1); - } - PYOPENCL_INLINE typename CLObj::cl_type* - get() - { - return &m_clobj; - } - PYOPENCL_INLINE void - convert() - { - *m_ret = __new_obj(typename gens::type()); - } - PYOPENCL_INLINE void - cleanup(bool converted) - { - if (converted) { - delete *m_ret; - *m_ret = nullptr; - } else { - call_guarded_cleanup(m_release, m_name, m_clobj); - } - } - PYOPENCL_INLINE void - print(std::ostream &stm, bool out=false) const - { - print_arg(stm, m_clobj, out); - } -}; - -template -static PYOPENCL_INLINE _CLObjOutArg -make_cloutarg(clobj_t *ret, cl_int (CL_API_CALL *release)(typename CLObj::cl_type), - const char *name, T... t1) -{ - return _CLObjOutArg(ret, release, name, t1...); -} -#define pyopencl_outarg(type, ret, func, ...) \ - make_cloutarg(ret, func, #func, ##__VA_ARGS__) - -// {{{ GetInfo helpers - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf -get_vec_info(cl_int (CL_API_CALL *func)(ArgTypes...), const char *name, - ArgTypes2&&... args) -{ - size_t size = 0; - call_guarded(func, name, args..., 0, nullptr, buf_arg(size)); - pyopencl_buf buf(size / sizeof(T)); - call_guarded(func, name, args..., size_arg(buf), buf_arg(size)); - return buf; -} -#define pyopencl_get_vec_info(type, what, ...) \ - get_vec_info(clGet##what##Info, "clGet" #what "Info", __VA_ARGS__) - -inline generic_info make_generic_info(class_t opaque_class, const char *type, bool free_type, void *value, bool free_value) -{ - generic_info result; - result.opaque_class = opaque_class; - result.type = type; - result.free_type = free_type; - result.value = value; - result.free_value = free_value; - return result; -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -convert_array_info(const char *tname, pyopencl_buf &buf) -{ - return make_generic_info( - CLASS_NONE, - _copy_str(std::string(tname) + "[" + tostring(buf.len()) + "]"), - true, - buf.release(), - true); -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -convert_array_info(const char *tname, pyopencl_buf &&_buf) -{ - pyopencl_buf &buf = _buf; - return convert_array_info(tname, buf); -} - -#define pyopencl_convert_array_info(type, buf) \ - convert_array_info(#type, buf) -#define pyopencl_get_array_info(type, what, ...) \ - pyopencl_convert_array_info(type, pyopencl_get_vec_info(type, what, __VA_ARGS__)) - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -convert_opaque_array_info(T &&buf) -{ - return make_generic_info( - CLObj::class_id, - _copy_str(std::string("void*[") + tostring(buf.len()) + "]"), - true, - buf_to_base(std::forward(buf)).release(), - true); -} -#define pyopencl_get_opaque_array_info(cls, what, ...) \ - convert_opaque_array_info( \ - pyopencl_get_vec_info(cls::cl_type, what, __VA_ARGS__)) - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -get_opaque_info(cl_int (CL_API_CALL *func)(ArgTypes...), const char *name, - ArgTypes2&&... args) -{ - typename CLObj::cl_type param_value; - call_guarded(func, name, args..., size_arg(param_value), nullptr); - void *value; - if (param_value) { - value = (void*)(new CLObj(param_value, /*retain*/ true)); - } else { - value = nullptr; - } - return make_generic_info(CLObj::class_id, "void *", false, value, true); -} -#define pyopencl_get_opaque_info(clobj, what, ...) \ - get_opaque_info(clGet##what##Info, \ - "clGet" #what "Info", __VA_ARGS__) - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -get_str_info(cl_int (CL_API_CALL *func)(ArgTypes...), const char *name, - ArgTypes2&&... args) -{ - size_t size; - call_guarded(func, name, args..., 0, nullptr, buf_arg(size)); - pyopencl_buf param_value(size); - call_guarded(func, name, args..., param_value, buf_arg(size)); - return make_generic_info(CLASS_NONE, "char*", false, (void*)param_value.release(), true); -} -#define pyopencl_get_str_info(what, ...) \ - get_str_info(clGet##what##Info, "clGet" #what "Info", __VA_ARGS__) - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -get_int_info(cl_int (CL_API_CALL *func)(ArgTypes...), const char *name, - const char *tpname, ArgTypes2&&... args) -{ - T value; - call_guarded(func, name, args..., size_arg(value), nullptr); - return make_generic_info(CLASS_NONE, tpname, false, cl_memdup(&value), true); -} -#define pyopencl_get_int_info(type, what, ...) \ - get_int_info(clGet##what##Info, "clGet" #what "Info", \ - #type "*", __VA_ARGS__) - -// }}} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE T* -convert_obj(cl_int (CL_API_CALL *clRelease)(CLType), const char *name, CLType cl_obj, - ArgTypes&&... args) -{ - try { - return new T(cl_obj, false, std::forward(args)...); - } catch (...) { - call_guarded_cleanup(clRelease, name, cl_obj); - throw; - } -} -#define pyopencl_convert_obj(type, func, ...) \ - convert_obj(func, #func, __VA_ARGS__) - -// {{{ extension function pointers - -#if PYOPENCL_CL_VERSION >= 0x1020 -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE T -get_ext_fun(cl_platform_id plat, const char *name, const char *err) -{ - T func = (T)clGetExtensionFunctionAddressForPlatform(plat, name); - if (!func) { - throw clerror(name, CL_INVALID_VALUE, err); - } - return func; -} -#define pyopencl_get_ext_fun(plat, name) \ - get_ext_fun(plat, #name, #name " not available") -#else -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE T -get_ext_fun(const char *name, const char *err) -{ - T func = (T)clGetExtensionFunctionAddress(name); - if (!func) { - throw clerror(name, CL_INVALID_VALUE, err); - } - return func; -} -#define pyopencl_get_ext_fun(plat, name) \ - get_ext_fun(#name, #name " not available") -#endif - -// }}} - -static PYOPENCL_INLINE std::ostream& -operator<<(std::ostream &stm, const cl_image_format &fmt) -{ - stm << "channel_order: " << fmt.image_channel_order - << ",\nchannel_data_type: " << fmt.image_channel_data_type; - return stm; -} - -#ifdef CL_DEVICE_TOPOLOGY_AMD -static PYOPENCL_INLINE std::ostream& -operator<<(std::ostream &stm, const cl_device_topology_amd &topol) -{ - stm << "pcie.bus: " << topol.pcie.bus - << ",\npcie.device: " << topol.pcie.device - << ",\npcie.function: " << topol.pcie.function - << ",\npcie.type: " << topol.pcie.type; - return stm; -} -#endif -#endif diff --git a/src/c_wrapper/clinfo_ext.h b/src/c_wrapper/clinfo_ext.h deleted file mode 100644 index 43b7b6082fda28ad433f26c5d9a5e2e743e24940..0000000000000000000000000000000000000000 --- a/src/c_wrapper/clinfo_ext.h +++ /dev/null @@ -1,129 +0,0 @@ -/* Include OpenCL header, and define OpenCL extensions, since what is and is not - * available in the official headers is very system-dependent */ - -#ifndef _EXT_H -#define _EXT_H - -#if (defined(__APPLE__) && !defined(PYOPENCL_APPLE_USE_CL_H)) -#include -#else -#include -#endif - -/* These two defines were introduced in the 1.2 headers - * on 2012-11-30, so earlier versions don't have them - * (e.g. Debian wheezy) - */ - -#ifndef CL_DEVICE_IMAGE_PITCH_ALIGNMENT -#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A -#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B -#endif - -/* - * Extensions - */ - -/* cl_khr_icd */ -#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 -#define CL_PLATFORM_NOT_FOUND_KHR -1001 - - -/* cl_khr_fp64 */ -#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 - -/* cl_khr_fp16 */ -#define CL_DEVICE_HALF_FP_CONFIG 0x1033 - -/* cl_khr_terminate_context */ -#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F - -/* cl_nv_device_attribute_query */ -#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 -#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 -#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 -#define CL_DEVICE_WARP_SIZE_NV 0x4003 -#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 -#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 -#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 -#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007 -#define CL_DEVICE_PCI_BUS_ID_NV 0x4008 -#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 - -/* cl_ext_atomic_counters_{32,64} */ -#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032 - -/* cl_amd_device_attribute_query */ -#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 -#define CL_DEVICE_TOPOLOGY_AMD 0x4037 -#define CL_DEVICE_BOARD_NAME_AMD 0x4038 -#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 -#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 -#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 -#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 -#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 -#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 -#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 -#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 -#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 -#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 -#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049 -#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A -#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B -#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C - -#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD -#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 - -typedef union -{ - struct { cl_uint type; cl_uint data[5]; } raw; - struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; -} cl_device_topology_amd; -#endif - -/* cl_amd_offline_devices */ -#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F - -/* cl_ext_device_fission */ -#define cl_ext_device_fission 1 - -typedef cl_ulong cl_device_partition_property_ext; - -#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 -#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 -#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 -#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 /* cl_intel_device_partition_by_names */ -#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 - -#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 -#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 -#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 -#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 -#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 - -#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 -#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 -#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 -#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 -#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 -#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 - -/* cl_intel_advanced_motion_estimation */ -#define CL_DEVICE_ME_VERSION_INTEL 0x407E - -/* cl_qcom_ext_host_ptr */ -#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 -#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 - -/* cl_khr_spir */ -#define CL_DEVICE_SPIR_VERSIONS 0x40E0 - -/* cl_altera_device_temperature */ -#define CL_DEVICE_CORE_TEMPERATURE_ALTERA 0x40F3 - -/* cl_intel_simultaneous_sharing */ -#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 -#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 - -#endif diff --git a/src/c_wrapper/clobj.h b/src/c_wrapper/clobj.h deleted file mode 100644 index 5db08710f2b10193b5cc5e8528257317f666de32..0000000000000000000000000000000000000000 --- a/src/c_wrapper/clobj.h +++ /dev/null @@ -1,149 +0,0 @@ -#include "utils.h" - -#ifndef __PYOPENCL_CLOBJ_H -#define __PYOPENCL_CLOBJ_H - -#define PYOPENCL_DEF_CL_CLASS(name) \ - constexpr static class_t class_id = CLASS_##name; \ - constexpr static const char *class_name = #name; - -struct clbase { -private: - // non-copyable - clbase(const clbase&) = delete; - clbase &operator=(const clbase&) = delete; - bool operator==(clbase const &other) const = delete; - bool operator!=(clbase const &other) const = delete; -public: - clbase() = default; - virtual ~clbase() = default; - virtual intptr_t intptr() const = 0; - virtual generic_info get_info(cl_uint) const = 0; -}; - -template -class clobj : public clbase { -private: - CLType m_obj; -public: - typedef CLType cl_type; - PYOPENCL_INLINE - clobj(CLType obj, bool=false) : m_obj(obj) - {} - PYOPENCL_INLINE const CLType& - data() const - { - return m_obj; - } - intptr_t - intptr() const - { - return (intptr_t)m_obj; - } -}; - -template -void -print_clobj(std::ostream &stm, const CLObj *obj) -{ - stm << CLObj::class_name << "(" << (const void*)obj << ")<" - << (const void*)obj->data() << ">"; -} - -template -class CLArg, - CLObj>::value> > { -private: - CLObj &m_obj; -public: - CLArg(CLObj &obj) : m_obj(obj) - { - } - PYOPENCL_INLINE const typename CLObj::cl_type& - convert() const - { - return m_obj.data(); - } - PYOPENCL_INLINE void - print(std::ostream &stm) - { - print_clobj(stm, &m_obj); - } -}; - -template -class CLArg, - CLObj>::value> > { -private: - CLObj *m_obj; -public: - CLArg(CLObj *obj) : m_obj(obj) - { - } - PYOPENCL_INLINE const typename CLObj::cl_type& - convert() const - { - return m_obj->data(); - } - PYOPENCL_INLINE void - print(std::ostream &stm) - { - print_clobj(stm, m_obj); - } -}; - -template -static PYOPENCL_INLINE CLObj* -clobj_from_int_ptr(intptr_t ptr, bool retain) -{ - return new CLObj(reinterpret_cast(ptr), retain); -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf -buf_from_class(T2 *buf2, size_t len) -{ - pyopencl_buf buf(len); - for (size_t i = 0;i < len;i++) { - buf[i] = static_cast(buf2[i])->data(); - } - return buf; -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf -buf_from_class(T2 &&buf) -{ - return buf_from_class(buf.get(), buf.len()); -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf -buf_to_base(T2 *buf2, size_t len, ArgTypes&&... args) -{ - pyopencl_buf buf(len); - size_t i = 0; - try { - for (;i < len;i++) { - buf[i] = static_cast( - new T((typename T::cl_type)buf2[i], - std::forward(args)...)); - } - } catch (...) { - for (size_t j = 0;j < i;j++) { - delete buf[i]; - } - throw; - } - return buf; -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE pyopencl_buf -buf_to_base(T2 &&buf2, ArgTypes&&... args) -{ - return buf_to_base(buf2.get(), buf2.len(), - std::forward(args)...); -} - -#endif diff --git a/src/c_wrapper/command_queue.cpp b/src/c_wrapper/command_queue.cpp deleted file mode 100644 index b8ecef1ee6b950b23888e37032caf632e3fe9bb4..0000000000000000000000000000000000000000 --- a/src/c_wrapper/command_queue.cpp +++ /dev/null @@ -1,132 +0,0 @@ -#include "command_queue.h" -#include "device.h" -#include "context.h" -#include "event.h" -#include "clhelper.h" - -template class clobj; -template void print_arg(std::ostream&, - const cl_command_queue&, bool); -template void print_clobj(std::ostream&, const command_queue*); -template void print_buf( - std::ostream&, const cl_command_queue*, size_t, ArgType, bool, bool); - -command_queue::~command_queue() -{ - pyopencl_call_guarded_cleanup(clReleaseCommandQueue, PYOPENCL_CL_CASTABLE_THIS); -} - -generic_info -command_queue::get_info(cl_uint param_name) const -{ - switch ((cl_command_queue_info)param_name) { - case CL_QUEUE_CONTEXT: - return pyopencl_get_opaque_info(context, CommandQueue, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_QUEUE_DEVICE: - return pyopencl_get_opaque_info(device, CommandQueue, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_QUEUE_REFERENCE_COUNT: - return pyopencl_get_int_info(cl_uint, CommandQueue, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_QUEUE_PROPERTIES: - return pyopencl_get_int_info(cl_command_queue_properties, - CommandQueue, PYOPENCL_CL_CASTABLE_THIS, param_name); - default: - throw clerror("CommandQueue.get_info", CL_INVALID_VALUE); - } -} - -// c wrapper - -// Command Queue -error* -create_command_queue(clobj_t *queue, clobj_t _ctx, - clobj_t _dev, cl_command_queue_properties props) -{ - auto ctx = static_cast(_ctx); - auto py_dev = static_cast(_dev); - return c_handle_error([&] { - cl_device_id dev; - if (py_dev) { - dev = py_dev->data(); - } else { - auto devs = pyopencl_get_vec_info(cl_device_id, Context, - ctx, CL_CONTEXT_DEVICES); - if (devs.len() == 0) { - throw clerror("CommandQueue", CL_INVALID_VALUE, - "context doesn't have any devices? -- " - "don't know which one to default to"); - } - dev = devs[0]; - } - cl_command_queue cl_queue = - pyopencl_call_guarded(clCreateCommandQueue, ctx, dev, props); - *queue = new command_queue(cl_queue, false); - }); -} - -error* -command_queue__finish(clobj_t queue) -{ - return c_handle_error([&] { - pyopencl_call_guarded(clFinish, static_cast(queue)); - }); -} - -error* -command_queue__flush(clobj_t queue) -{ - return c_handle_error([&] { - pyopencl_call_guarded(clFlush, static_cast(queue)); - }); -} - -error* -enqueue_marker_with_wait_list(clobj_t *evt, clobj_t _queue, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto queue = static_cast(_queue); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_error([&] { - pyopencl_call_guarded(clEnqueueMarkerWithWaitList, queue, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueMarkerWithWaitList, "CL 1.2") -#endif -} - -error* -enqueue_barrier_with_wait_list(clobj_t *evt, clobj_t _queue, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto queue = static_cast(_queue); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_error([&] { - pyopencl_call_guarded(clEnqueueBarrierWithWaitList, queue, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueBarrierWithWaitList, "CL 1.2") -#endif -} - -error* -enqueue_marker(clobj_t *evt, clobj_t _queue) -{ - auto queue = static_cast(_queue); - return c_handle_error([&] { - pyopencl_call_guarded(clEnqueueMarker, queue, event_out(evt)); - }); -} - -error* -enqueue_barrier(clobj_t _queue) -{ - auto queue = static_cast(_queue); - return c_handle_error([&] { - pyopencl_call_guarded(clEnqueueBarrier, queue); - }); -} diff --git a/src/c_wrapper/command_queue.h b/src/c_wrapper/command_queue.h deleted file mode 100644 index 3a7c01710133f90c40e3afab58abc96f88277f86..0000000000000000000000000000000000000000 --- a/src/c_wrapper/command_queue.h +++ /dev/null @@ -1,64 +0,0 @@ -#include "error.h" - -#ifndef __PYOPENCL_COMMAND_QUEUE_H -#define __PYOPENCL_COMMAND_QUEUE_H - -// {{{ command_queue - -extern template class clobj; -extern template void print_arg( - std::ostream&, const cl_command_queue&, bool); -extern template void print_buf( - std::ostream&, const cl_command_queue*, size_t, ArgType, bool, bool); - -class command_queue : public clobj { -public: - PYOPENCL_DEF_CL_CLASS(COMMAND_QUEUE); - PYOPENCL_INLINE - command_queue(cl_command_queue q, bool retain) - : clobj(q) - { - if (retain) { - pyopencl_call_guarded(clRetainCommandQueue, PYOPENCL_CL_CASTABLE_THIS); - } - } - PYOPENCL_INLINE - command_queue(const command_queue &queue) - : command_queue(queue.data(), true) - {} - ~command_queue(); - - generic_info get_info(cl_uint param_name) const; - -#if 0 - - PYOPENCL_USE_RESULT std::unique_ptr - get_context() const - { - cl_context param_value; - pyopencl_call_guarded(clGetCommandQueueInfo, this, CL_QUEUE_CONTEXT, - size_arg(param_value), nullptr); - return std::unique_ptr( - new context(param_value, /*retain*/ true)); - } - -#if PYOPENCL_CL_VERSION < 0x1010 - cl_command_queue_properties - set_property(cl_command_queue_properties prop, bool enable) const - { - cl_command_queue_properties old_prop; - pyopencl_call_guarded(clSetCommandQueueProperty, this, prop, - enable, buf_arg(old_prop)); - return old_prop; - } -#endif - -#endif -}; - -extern template void print_clobj(std::ostream&, - const command_queue*); - -// }}} - -#endif diff --git a/src/c_wrapper/context.cpp b/src/c_wrapper/context.cpp deleted file mode 100644 index 0fe48554f954e46dad0ef5561932a9cb9fdb75ff..0000000000000000000000000000000000000000 --- a/src/c_wrapper/context.cpp +++ /dev/null @@ -1,153 +0,0 @@ -#include "context.h" -#include "device.h" -#include "platform.h" -#include "clhelper.h" - -template class clobj; -template void print_arg(std::ostream&, const cl_context&, bool); -template void print_clobj(std::ostream&, const context*); -template void print_buf(std::ostream&, const cl_context*, - size_t, ArgType, bool, bool); - -void -context::get_version(cl_context ctx, int *major, int *minor) -{ - cl_device_id s_buff[16]; - size_t size; - pyopencl_buf d_buff(0); - cl_device_id *devs = s_buff; - pyopencl_call_guarded(clGetContextInfo, ctx, CL_CONTEXT_DEVICES, - 0, nullptr, buf_arg(size)); - if (PYOPENCL_UNLIKELY(!size)) { - throw clerror("Context.get_version", CL_INVALID_VALUE, - "Cannot get devices from context."); - } - if (PYOPENCL_UNLIKELY(size > sizeof(s_buff))) { - d_buff.resize(size / sizeof(cl_device_id)); - devs = d_buff.get(); - } - pyopencl_call_guarded(clGetContextInfo, ctx, CL_CONTEXT_DEVICES, - size_arg(devs, size), buf_arg(size)); - device::get_version(devs[0], major, minor); -} - -context::~context() -{ - pyopencl_call_guarded_cleanup(clReleaseContext, PYOPENCL_CL_CASTABLE_THIS); -} - -generic_info -context::get_info(cl_uint param_name) const -{ - switch ((cl_context_info)param_name) { - case CL_CONTEXT_REFERENCE_COUNT: - return pyopencl_get_int_info(cl_uint, Context, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_CONTEXT_DEVICES: - return pyopencl_get_opaque_array_info(device, Context, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_CONTEXT_PROPERTIES: { - auto result = pyopencl_get_vec_info( - cl_context_properties, Context, PYOPENCL_CL_CASTABLE_THIS, param_name); - pyopencl_buf py_result(result.len() / 2); - size_t i = 0; - for (;i < py_result.len();i++) { - cl_context_properties key = result[i * 2]; - if (key == 0) - break; - cl_context_properties value = result[i * 2 + 1]; - switch (key) { - case CL_CONTEXT_PLATFORM: - py_result[i] = make_generic_info( - CLASS_PLATFORM, - "void *", false, - new platform(reinterpret_cast(value)), true); - break; - -#if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1) -#if defined(__APPLE__) && defined(HAVE_GL) && !defined(PYOPENCL_APPLE_USE_CL_H) - case CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE: -#else - case CL_GL_CONTEXT_KHR: - case CL_EGL_DISPLAY_KHR: - case CL_GLX_DISPLAY_KHR: - case CL_WGL_HDC_KHR: - case CL_CGL_SHAREGROUP_KHR: -#endif - py_result[i] = make_generic_info( - CLASS_NONE, - "intptr_t *", false, - (void*)value, - // we do not own this object - false); - break; -#endif - default: - throw clerror("Context.get_info", CL_INVALID_VALUE, - "unknown context_property key encountered"); - } - } - py_result.resize(i); - return pyopencl_convert_array_info(generic_info, py_result); - } - -#if PYOPENCL_CL_VERSION >= 0x1010 - case CL_CONTEXT_NUM_DEVICES: - return pyopencl_get_int_info(cl_uint, Context, - PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif - - default: - throw clerror("Context.get_info", CL_INVALID_VALUE); - } -} - -// c wrapper - -// Context -error* -create_context(clobj_t *_ctx, const cl_context_properties *props, - cl_uint num_devices, const clobj_t *_devices) -{ - // TODO debug print properties - return c_handle_error([&] { - const auto devices = buf_from_class(_devices, num_devices); - *_ctx = new context( - pyopencl_call_guarded( - clCreateContext, - const_cast(props), - devices, nullptr, nullptr), false); - }); -} - -// Context -error* -create_context_from_type(clobj_t *_ctx, const cl_context_properties *props, - cl_device_type dev_type) -{ - // TODO debug print properties - return c_handle_error([&] { - *_ctx = new context( - pyopencl_call_guarded( - clCreateContextFromType, - const_cast(props), - dev_type, nullptr, nullptr), false); - }); -} - -error* -context__get_supported_image_formats(clobj_t _ctx, cl_mem_flags flags, - cl_mem_object_type image_type, - generic_info *out) -{ - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - cl_uint num; - pyopencl_call_guarded(clGetSupportedImageFormats, ctx, flags, - image_type, 0, nullptr, buf_arg(num)); - pyopencl_buf formats(num); - pyopencl_call_guarded(clGetSupportedImageFormats, ctx, flags, - image_type, formats, buf_arg(num)); - *out = pyopencl_convert_array_info(cl_image_format, formats); - }); -} diff --git a/src/c_wrapper/context.h b/src/c_wrapper/context.h deleted file mode 100644 index 1691035d09fb5628c0bbda967c205f30a4882100..0000000000000000000000000000000000000000 --- a/src/c_wrapper/context.h +++ /dev/null @@ -1,34 +0,0 @@ -#include "error.h" - -#ifndef __PYOPENCL_CONTEXT_H -#define __PYOPENCL_CONTEXT_H - -// {{{ context - -extern template class clobj; -extern template void print_arg(std::ostream&, - const cl_context&, bool); -extern template void print_buf(std::ostream&, const cl_context*, - size_t, ArgType, bool, bool); - -class context : public clobj { -public: - PYOPENCL_DEF_CL_CLASS(CONTEXT); - static void get_version(cl_context ctx, int *major, int *minor); - PYOPENCL_INLINE - context(cl_context ctx, bool retain) - : clobj(ctx) - { - if (retain) { - pyopencl_call_guarded(clRetainContext, PYOPENCL_CL_CASTABLE_THIS); - } - } - ~context(); - generic_info get_info(cl_uint param_name) const; -}; - -extern template void print_clobj(std::ostream&, const context*); - -// }}} - -#endif diff --git a/src/c_wrapper/debug.cpp b/src/c_wrapper/debug.cpp deleted file mode 100644 index a118b4687148ad8fd4cdc846cbeb34de0add14d7..0000000000000000000000000000000000000000 --- a/src/c_wrapper/debug.cpp +++ /dev/null @@ -1,84 +0,0 @@ -#include "debug.h" -#include -#include -#include -#include - -std::mutex dbg_lock; - -void -dbg_print_str(std::ostream &stm, const char *str, size_t len) -{ - stm << '"'; - for (size_t i = 0;i < len;i++) { - char escaped = 0; -#define escape_char(in, out) \ - case in: \ - escaped = out; \ - break - switch (str[i]) { - escape_char('\'', '\''); - escape_char('\"', '\"'); - escape_char('\?', '\?'); - escape_char('\\', '\\'); - escape_char('\0', '0'); - escape_char('\a', 'a'); - escape_char('\b', 'b'); - escape_char('\f', 'f'); - escape_char('\r', 'r'); - escape_char('\v', 'v'); - default: - break; - } - if (escaped) { - stm << '\\' << escaped; - } else { - stm << str[i]; - } - } - stm << '"'; -} - -void -dbg_print_bytes(std::ostream &stm, const unsigned char *bytes, size_t len) -{ - stm << '"'; - for (size_t i = 0;i < len;i++) { - stm << "\\x" << std::hex << std::setfill('0') - << std::setw(2) << bytes[i]; - } - stm << std::dec << '"'; -} - -static PYOPENCL_INLINE bool -_get_debug_env() -{ - const char *env = getenv("PYOPENCL_DEBUG"); - const bool default_debug = DEFAULT_DEBUG; - if (!env) { - return default_debug; - } - if (strcasecmp(env, "0") == 0 || strcasecmp(env, "f") == 0 || - strcasecmp(env, "false") == 0 || strcasecmp(env, "off") == 0) { - return false; - } - if (strcasecmp(env, "1") == 0 || strcasecmp(env, "t") == 0 || - strcasecmp(env, "true") == 0 || strcasecmp(env, "on") == 0) { - return true; - } - return default_debug; -} - -bool debug_enabled = _get_debug_env(); - -int -get_debug() -{ - return (int) debug_enabled; -} - -void -set_debug(int debug) -{ - debug_enabled = (bool)debug; -} diff --git a/src/c_wrapper/debug.h b/src/c_wrapper/debug.h deleted file mode 100644 index f0700030fa9ea0255aba8d37cd5368cb7120aa93..0000000000000000000000000000000000000000 --- a/src/c_wrapper/debug.h +++ /dev/null @@ -1,33 +0,0 @@ -#include "wrap_cl.h" -#include "function.h" -#include -#include - -#ifdef __MINGW32__ -#include "mingw-std-threads/mingw.mutex.h" -#include "mingw-std-threads/mingw.thread.h" -#endif - -#ifndef __PYOPENCL_DEBUG_H -#define __PYOPENCL_DEBUG_H - -extern bool debug_enabled; -#ifdef PYOPENCL_TRACE -#define DEFAULT_DEBUG true -#else -#define DEFAULT_DEBUG false -#endif - -#define DEBUG_ON (PYOPENCL_EXPECT(debug_enabled, DEFAULT_DEBUG)) - -extern std::mutex dbg_lock; - -void dbg_print_str(std::ostream&, const char*, size_t); -static PYOPENCL_INLINE void -dbg_print_str(std::ostream &stm, const char *str) -{ - return dbg_print_str(stm, str, strlen(str)); -} -void dbg_print_bytes(std::ostream &stm, const unsigned char *bytes, size_t len); - -#endif diff --git a/src/c_wrapper/device.cpp b/src/c_wrapper/device.cpp deleted file mode 100644 index 16edaf34c7be2934e6350855a1788bf1311a6641..0000000000000000000000000000000000000000 --- a/src/c_wrapper/device.cpp +++ /dev/null @@ -1,375 +0,0 @@ -#include "device.h" -#include "platform.h" - -template class clobj; -template void print_arg(std::ostream&, - const cl_device_id&, bool); -template void print_clobj(std::ostream&, const device*); -template void print_buf(std::ostream&, const cl_device_id*, - size_t, ArgType, bool, bool); - -void -device::get_version(cl_device_id dev, int *major, int *minor) -{ - cl_platform_id plat; - pyopencl_call_guarded(clGetDeviceInfo, dev, CL_DEVICE_PLATFORM, - size_arg(plat), nullptr); - platform::get_version(plat, major, minor); -} - -device::~device() -{ - if (false) { - } -#if PYOPENCL_CL_VERSION >= 0x1020 - else if (m_ref_type == REF_CL_1_2) { - pyopencl_call_guarded_cleanup(clReleaseDevice, PYOPENCL_CL_CASTABLE_THIS); - } -#endif -} - -#ifdef CL_DEVICE_TOPOLOGY_AMD -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE generic_info -get_device_topology_amd(ArgTypes&&... args) -{ - const char * tpname = "cl_device_topology_amd*"; - cl_device_topology_amd value; - const char * fname = "clGetDeviceInfo"; - call_guarded(clGetDeviceInfo, fname, args..., size_arg(value), nullptr); - return make_generic_info(CLASS_NONE, tpname, false, cl_memdup(&value), true); -} - -#define pyopencl_get_device_topology_amd(...) get_device_topology_amd(__VA_ARGS__) - -#endif - -generic_info -device::get_info(cl_uint param_name) const -{ -#define DEV_GET_INT_INF(TYPE) \ - pyopencl_get_int_info(TYPE, Device, PYOPENCL_CL_CASTABLE_THIS, param_name) - - switch ((cl_device_info)param_name) { - case CL_DEVICE_TYPE: - return DEV_GET_INT_INF(cl_device_type); - case CL_DEVICE_MAX_WORK_GROUP_SIZE: - return DEV_GET_INT_INF(size_t); - case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: - case CL_DEVICE_MAX_COMPUTE_UNITS: - case CL_DEVICE_VENDOR_ID: - return DEV_GET_INT_INF(cl_uint); - - case CL_DEVICE_MAX_WORK_ITEM_SIZES: - return pyopencl_get_array_info(size_t, Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: - - case CL_DEVICE_MAX_CLOCK_FREQUENCY: - case CL_DEVICE_ADDRESS_BITS: - case CL_DEVICE_MAX_READ_IMAGE_ARGS: - case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: - case CL_DEVICE_MAX_SAMPLERS: - case CL_DEVICE_MEM_BASE_ADDR_ALIGN: - case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: - return DEV_GET_INT_INF(cl_uint); - - case CL_DEVICE_MAX_MEM_ALLOC_SIZE: - return DEV_GET_INT_INF(cl_ulong); - - case CL_DEVICE_IMAGE2D_MAX_WIDTH: - case CL_DEVICE_IMAGE2D_MAX_HEIGHT: - case CL_DEVICE_IMAGE3D_MAX_WIDTH: - case CL_DEVICE_IMAGE3D_MAX_HEIGHT: - case CL_DEVICE_IMAGE3D_MAX_DEPTH: - case CL_DEVICE_MAX_PARAMETER_SIZE: - return DEV_GET_INT_INF(size_t); - - case CL_DEVICE_IMAGE_SUPPORT: - return DEV_GET_INT_INF(cl_bool); -#ifdef CL_DEVICE_DOUBLE_FP_CONFIG - case CL_DEVICE_DOUBLE_FP_CONFIG: -#endif -#ifdef CL_DEVICE_HALF_FP_CONFIG - case CL_DEVICE_HALF_FP_CONFIG: -#endif - case CL_DEVICE_SINGLE_FP_CONFIG: - return DEV_GET_INT_INF(cl_device_fp_config); - - case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: - return DEV_GET_INT_INF(cl_device_mem_cache_type); - case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: - case CL_DEVICE_GLOBAL_MEM_SIZE: - case CL_DEVICE_LOCAL_MEM_SIZE: - case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: - return DEV_GET_INT_INF(cl_ulong); - - case CL_DEVICE_MAX_CONSTANT_ARGS: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_LOCAL_MEM_TYPE: - return DEV_GET_INT_INF(cl_device_local_mem_type); - case CL_DEVICE_PROFILING_TIMER_RESOLUTION: - return DEV_GET_INT_INF(size_t); - case CL_DEVICE_ENDIAN_LITTLE: - case CL_DEVICE_AVAILABLE: - case CL_DEVICE_COMPILER_AVAILABLE: - case CL_DEVICE_ERROR_CORRECTION_SUPPORT: - return DEV_GET_INT_INF(cl_bool); - case CL_DEVICE_EXECUTION_CAPABILITIES: - return DEV_GET_INT_INF(cl_device_exec_capabilities); - case CL_DEVICE_QUEUE_PROPERTIES: - // same as CL_DEVICE_QUEUE_ON_HOST_PROPERTIES in 2.0 - return DEV_GET_INT_INF(cl_command_queue_properties); - - case CL_DEVICE_NAME: - case CL_DEVICE_VENDOR: - case CL_DRIVER_VERSION: - case CL_DEVICE_PROFILE: - case CL_DEVICE_VERSION: - case CL_DEVICE_EXTENSIONS: - return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - - case CL_DEVICE_PLATFORM: - return pyopencl_get_opaque_info(platform, Device, PYOPENCL_CL_CASTABLE_THIS, param_name); -#if PYOPENCL_CL_VERSION >= 0x1010 - case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: - case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: - return DEV_GET_INT_INF(cl_uint); - - case CL_DEVICE_HOST_UNIFIED_MEMORY: // deprecated in 2.0 - return DEV_GET_INT_INF(cl_bool); - case CL_DEVICE_OPENCL_C_VERSION: - return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV - case CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV: - case CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV: - case CL_DEVICE_REGISTERS_PER_BLOCK_NV: - case CL_DEVICE_WARP_SIZE_NV: -#ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV - case CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV: -#endif -#ifdef CL_DEVICE_PCI_BUS_ID_NV - case CL_DEVICE_PCI_BUS_ID_NV: -#endif -#ifdef CL_DEVICE_PCI_SLOT_ID_NV - case CL_DEVICE_PCI_SLOT_ID_NV: -#endif - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_GPU_OVERLAP_NV: - case CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV: - case CL_DEVICE_INTEGRATED_MEMORY_NV: - return DEV_GET_INT_INF(cl_bool); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - case CL_DEVICE_LINKER_AVAILABLE: - return DEV_GET_INT_INF(cl_bool); - case CL_DEVICE_BUILT_IN_KERNELS: - return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: - case CL_DEVICE_IMAGE_MAX_ARRAY_SIZE: - return DEV_GET_INT_INF(size_t); - case CL_DEVICE_PARENT_DEVICE: - return pyopencl_get_opaque_info(device, Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_DEVICE_PARTITION_MAX_SUB_DEVICES: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_PARTITION_TYPE: - case CL_DEVICE_PARTITION_PROPERTIES: - return pyopencl_get_array_info(cl_device_partition_property, - Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: - return pyopencl_get_array_info(cl_device_affinity_domain, - Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_DEVICE_REFERENCE_COUNT: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC: - case CL_DEVICE_PRINTF_BUFFER_SIZE: - return DEV_GET_INT_INF(cl_bool); -#endif -#ifdef cl_khr_image2d_from_buffer - case CL_DEVICE_IMAGE_PITCH_ALIGNMENT: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT: - return DEV_GET_INT_INF(cl_uint); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - case CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE: - return DEV_GET_INT_INF(size_t); - case CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES: - return DEV_GET_INT_INF(cl_command_queue_properties); - case CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_MAX_ON_DEVICE_QUEUES: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_MAX_ON_DEVICE_EVENTS: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_SVM_CAPABILITIES: - return DEV_GET_INT_INF(cl_device_svm_capabilities); - case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: - return DEV_GET_INT_INF(size_t); - case CL_DEVICE_MAX_PIPE_ARGS: - case CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS: - case CL_DEVICE_PIPE_MAX_PACKET_SIZE: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT: - case CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT: - case CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT: - return DEV_GET_INT_INF(cl_uint); -#endif -#if PYOPENCL_CL_VERSION >= 0x2010 - case CL_DEVICE_IL_VERSION: - return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_DEVICE_MAX_NUM_SUB_GROUPS: - return DEV_GET_INT_INF(cl_uint); - case CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: - return DEV_GET_INT_INF(cl_bool); -#endif - - - // {{{ AMD dev attrs - // - // types of AMD dev attrs divined from - // https://www.khronos.org/registry/cl/api/1.2/cl.hpp -#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD - case CL_DEVICE_PROFILING_TIMER_OFFSET_AMD: - return DEV_GET_INT_INF(cl_ulong); -#endif -#ifdef CL_DEVICE_TOPOLOGY_AMD - case CL_DEVICE_TOPOLOGY_AMD: - return pyopencl_get_device_topology_amd(PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD - case CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD: - return DEV_GET_INT_INF(cl_bool); -#endif -#ifdef CL_DEVICE_BOARD_NAME_AMD - case CL_DEVICE_BOARD_NAME_AMD: ; - return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD - case CL_DEVICE_GLOBAL_FREE_MEMORY_AMD: - return pyopencl_get_array_info(size_t, Device, - PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD - case CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD: -#endif -#ifdef CL_DEVICE_SIMD_WIDTH_AMD - case CL_DEVICE_SIMD_WIDTH_AMD: -#endif -#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD - case CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD: -#endif -#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD - case CL_DEVICE_WAVEFRONT_WIDTH_AMD: -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD - case CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD: -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD - case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD: -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD - case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD: -#endif -#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD - case CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD: -#endif -#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD - case CL_DEVICE_LOCAL_MEM_BANKS_AMD: -#endif -#ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT - case CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT: -#endif -#ifdef CL_DEVICE_GFXIP_MAJOR_AMD - case CL_DEVICE_GFXIP_MAJOR_AMD: -#endif -#ifdef CL_DEVICE_GFXIP_MINOR_AMD - case CL_DEVICE_GFXIP_MINOR_AMD: -#endif -#ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD - case CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD: -#endif - return DEV_GET_INT_INF(cl_uint); - // }}} -#ifdef CL_DEVICE_ME_VERSION_INTEL - case CL_DEVICE_ME_VERSION_INTEL: -#endif -#ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM - case CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM: -#endif -#ifdef CL_DEVICE_PAGE_SIZE_QCOM - case CL_DEVICE_PAGE_SIZE_QCOM: -#endif -#ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL - case CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL: -#endif - return DEV_GET_INT_INF(cl_uint); -#ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL - case CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL: - return pyopencl_get_array_info(cl_uint, Device, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#ifdef CL_DEVICE_SPIR_VERSIONS - case CL_DEVICE_SPIR_VERSIONS: - return pyopencl_get_str_info(Device, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA - case CL_DEVICE_CORE_TEMPERATURE_ALTERA: - return DEV_GET_INT_INF(cl_int); -#endif - - default: - throw clerror("Device.get_info", CL_INVALID_VALUE); - } -} - -#if PYOPENCL_CL_VERSION >= 0x1020 -PYOPENCL_USE_RESULT pyopencl_buf -device::create_sub_devices(const cl_device_partition_property *props) -{ - // TODO debug print props - cl_uint num_devices; - pyopencl_call_guarded(clCreateSubDevices, PYOPENCL_CL_CASTABLE_THIS, props, 0, nullptr, - buf_arg(num_devices)); - pyopencl_buf devices(num_devices); - pyopencl_call_guarded(clCreateSubDevices, PYOPENCL_CL_CASTABLE_THIS, props, devices, - buf_arg(num_devices)); - return buf_to_base(devices, true, device::REF_CL_1_2); -} -#endif - -// c wrapper - -error* -device__create_sub_devices(clobj_t _dev, clobj_t **_devs, - uint32_t *num_devices, - const cl_device_partition_property *props) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto dev = static_cast(_dev); - return c_handle_error([&] { - auto devs = dev->create_sub_devices(props); - *num_devices = (uint32_t)devs.len(); - *_devs = devs.release(); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clCreateSubDevices, "CL 1.2") -#endif -} diff --git a/src/c_wrapper/device.h b/src/c_wrapper/device.h deleted file mode 100644 index a14a946804f0c116a683548068960d2a19db3df2..0000000000000000000000000000000000000000 --- a/src/c_wrapper/device.h +++ /dev/null @@ -1,61 +0,0 @@ -#include "clhelper.h" - -#ifndef __PYOPENCL_DEVICE_H -#define __PYOPENCL_DEVICE_H - -// {{{ device - -extern template class clobj; -extern template void print_arg(std::ostream&, - const cl_device_id&, bool); -extern template void print_buf(std::ostream&, const cl_device_id*, - size_t, ArgType, bool, bool); - -class device : public clobj { -public: - PYOPENCL_DEF_CL_CLASS(DEVICE); - enum reference_type_t { - REF_NOT_OWNABLE, - REF_CL_1_2, - }; - -private: - reference_type_t m_ref_type; - -public: - static void get_version(cl_device_id dev, int *major, int *minor); - device(cl_device_id did, bool retain=false, - reference_type_t ref_type=REF_NOT_OWNABLE) - : clobj(did), m_ref_type(ref_type) - { - if (retain && ref_type != REF_NOT_OWNABLE) { - if (false) { - } -#if PYOPENCL_CL_VERSION >= 0x1020 - else if (ref_type == REF_CL_1_2) { - pyopencl_call_guarded(clRetainDevice, PYOPENCL_CL_CASTABLE_THIS); - } -#endif - - else { - throw clerror("Device", CL_INVALID_VALUE, - "cannot own references to devices when device " - "fission or CL 1.2 is not available"); - } - } - } - - ~device(); - - generic_info get_info(cl_uint param_name) const; -#if PYOPENCL_CL_VERSION >= 0x1020 - PYOPENCL_USE_RESULT pyopencl_buf - create_sub_devices(const cl_device_partition_property *props); -#endif -}; - -extern template void print_clobj(std::ostream&, const device*); - -// }}} - -#endif diff --git a/src/c_wrapper/error.h b/src/c_wrapper/error.h deleted file mode 100644 index 30e985f93e7dfef3f5dafb702275e064e0b509aa..0000000000000000000000000000000000000000 --- a/src/c_wrapper/error.h +++ /dev/null @@ -1,337 +0,0 @@ -#include "wrap_cl.h" -#include "pyhelper.h" -#include "clobj.h" - -#include -#include -#include -#include -#include -#include - -#ifndef __PYOPENCL_ERROR_H -#define __PYOPENCL_ERROR_H - -// {{{ error - -// See https://github.com/inducer/pyopencl/pull/83 -#if GCC_VERSION > 50200 -#define PYOPENCL_CL_CASTABLE_THIS this -#else -#define PYOPENCL_CL_CASTABLE_THIS data() -#endif - -// discouraged, assumes 'version linearity', use PYOPENCL_UNSUPPORTED_BEFORE -#define PYOPENCL_UNSUPPORTED(ROUTINE, VERSION) \ - auto err = (error*)malloc(sizeof(error)); \ - err->routine = strdup(#ROUTINE); \ - err->msg = strdup("unsupported in " VERSION); \ - err->code = CL_INVALID_VALUE; \ - err->other = 0; \ - return err; - -#define PYOPENCL_UNSUPPORTED_BEFORE(ROUTINE, VERSION) \ - auto err = (error*)malloc(sizeof(error)); \ - err->routine = strdup(#ROUTINE); \ - err->msg = strdup("unsupported before " VERSION); \ - err->code = CL_INVALID_VALUE; \ - err->other = 0; \ - return err; - -class clerror : public std::runtime_error { -private: - const char *m_routine; - cl_int m_code; - -public: - clerror(const char *rout, cl_int c, const char *msg="") - : std::runtime_error(msg), m_routine(rout), m_code(c) - { - if (DEBUG_ON) { - std::lock_guard lock(dbg_lock); - std::cerr << rout << ";" << msg<< ";" << c << std::endl; - } - } - PYOPENCL_INLINE const char* - routine() const - { - return m_routine; - } - - PYOPENCL_INLINE cl_int - code() const - { - return m_code; - } - - PYOPENCL_INLINE bool - is_out_of_memory() const - { - // matches Python implementation in pyopencl/cffi_cl.py - return (code() == CL_MEM_OBJECT_ALLOCATION_FAILURE || - code() == CL_OUT_OF_RESOURCES || - code() == CL_OUT_OF_HOST_MEMORY); - } -}; - -// }}} - -// {{{ tracing and error reporting - -template -struct __CLArgGetter { - template - static PYOPENCL_INLINE auto - get(T&& clarg) -> decltype(clarg.convert()) - { - return clarg.convert(); - } -}; - -template -struct __CLFinish { - static PYOPENCL_INLINE void - call(T, bool) - { - } -}; - -template -struct __CLFinish().finish(true)))> { - static PYOPENCL_INLINE void - call(T v, bool converted) - { - v.finish(converted); - } -}; - -template -struct __CLPost { - static PYOPENCL_INLINE void - call(T) - { - } -}; - -template -struct __CLPost().post()))> { - static PYOPENCL_INLINE void - call(T v) - { - v.post(); - } -}; - -template -struct is_out_arg : std::false_type {}; - -template -struct is_out_arg::is_out> > : std::true_type {}; - -template -struct __CLPrintOut { - static PYOPENCL_INLINE void - call(T, std::ostream&) - { - } -}; - -template -struct __CLPrintOut::value> > { - static inline void - call(T v, std::ostream &stm) - { - stm << ", "; - v.print(stm, true); - } -}; - -template -struct __CLPrint { - static inline void - call(T v, std::ostream &stm, bool &&first) - { - if (!first) { - stm << ", "; - } else { - first = false; - } - if (is_out_arg::value) { - stm << "{out}"; - } - v.print(stm); - } -}; - -template class Caller, size_t n, typename T> -struct __CLCall { - template - static PYOPENCL_INLINE void - call(T &&t, Ts&&... ts) - { - __CLCall::call(std::forward(t), - std::forward(ts)...); - Caller(t))>::call(std::get(t), - std::forward(ts)...); - } -}; - -template class Caller, typename T> -struct __CLCall { - template - static PYOPENCL_INLINE void - call(T &&t, Ts&&... ts) - { - Caller(t))>::call(std::get<0>(t), - std::forward(ts)...); - } -}; - -template -class CLArgPack : public ArgPack { - template void - _print_trace(T &res, const char *name) - { - typename CLArgPack::tuple_base *that = this; - std::cerr << name << "("; - __CLCall<__CLPrint, sizeof...(Types) - 1, - decltype(*that)>::call(*that, std::cerr, true); - std::cerr << ") = (ret: " << res; - __CLCall<__CLPrintOut, sizeof...(Types) - 1, - decltype(*that)>::call(*that, std::cerr); - std::cerr << ")" << std::endl; - } -public: - using ArgPack::ArgPack; - template - PYOPENCL_INLINE auto - clcall(Func func, const char *name) - -> decltype(this->template call<__CLArgGetter>(func)) - { - auto res = this->template call<__CLArgGetter>(func); - if (DEBUG_ON) { - std::lock_guard lock(dbg_lock); - _print_trace(res, name); - } - return res; - } - PYOPENCL_INLINE void - finish() - { - typename CLArgPack::tuple_base *that = this; - __CLCall<__CLFinish, sizeof...(Types) - 1, - decltype(*that)>::call(*that, false); - __CLCall<__CLPost, sizeof...(Types) - 1, - decltype(*that)>::call(*that); - __CLCall<__CLFinish, sizeof...(Types) - 1, - decltype(*that)>::call(*that, true); - } -}; - -template -static PYOPENCL_INLINE CLArgPack...> -make_clargpack(Types&&... args) -{ - return CLArgPack...>(std::forward(args)...); -} - -template -static PYOPENCL_INLINE void -call_guarded(cl_int (CL_API_CALL *func)(ArgTypes...), const char *name, ArgTypes2&&... args) -{ - auto argpack = make_clargpack(std::forward(args)...); - cl_int status_code = argpack.clcall(func, name); - if (status_code != CL_SUCCESS) { - throw clerror(name, status_code); - } - argpack.finish(); -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE T -call_guarded(T (CL_API_CALL *func)(ArgTypes...), const char *name, ArgTypes2&&... args) -{ - cl_int status_code = CL_SUCCESS; - auto status_arg = buf_arg(status_code); - auto argpack = make_clargpack(std::forward(args)..., status_arg); - T res = argpack.clcall(func, name); - if (status_code != CL_SUCCESS) { - throw clerror(name, status_code); - } - argpack.finish(); - return res; -} -#define pyopencl_call_guarded(func, ...) \ - call_guarded(func, #func, __VA_ARGS__) - -static PYOPENCL_INLINE void -cleanup_print_error(cl_int status_code, const char *name) noexcept -{ - std::cerr << ("PyOpenCL WARNING: a clean-up operation failed " - "(dead context maybe?)") << std::endl - << name << " failed with code " << status_code << std::endl; -} - -template -static PYOPENCL_INLINE void -call_guarded_cleanup(cl_int (CL_API_CALL *func)(ArgTypes...), const char *name, - ArgTypes2&&... args) -{ - auto argpack = make_clargpack(std::forward(args)...); - cl_int status_code = argpack.clcall(func, name); - if (status_code != CL_SUCCESS) { - cleanup_print_error(status_code, name); - } else { - argpack.finish(); - } -} -#define pyopencl_call_guarded_cleanup(func, ...) \ - call_guarded_cleanup(func, #func, __VA_ARGS__) - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE error* -c_handle_error(Func func) noexcept -{ - try { - func(); - return nullptr; - } catch (const clerror &e) { - auto err = (error*)malloc(sizeof(error)); - err->routine = strdup(e.routine()); - err->msg = strdup(e.what()); - err->code = e.code(); - err->other = 0; - return err; - } catch (const std::exception &e) { - /* non-pyopencl exceptions need to be converted as well */ - auto err = (error*)malloc(sizeof(error)); - err->other = 1; - err->msg = strdup(e.what()); - return err; - } -} - -template -static PYOPENCL_INLINE auto -retry_mem_error(Func func) -> decltype(func()) -{ - try { - return func(); - } catch (clerror &e) { - if (PYOPENCL_LIKELY(!e.is_out_of_memory()) || !py::gc()) { - throw; - } - } - return func(); -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE error* -c_handle_retry_mem_error(Func &&func) noexcept -{ - return c_handle_error([&] {retry_mem_error(std::forward(func));}); -} - -// }}} - -#endif diff --git a/src/c_wrapper/event.cpp b/src/c_wrapper/event.cpp deleted file mode 100644 index d75c3a324030f5bee50f2067f03e3c7dd40d6e9e..0000000000000000000000000000000000000000 --- a/src/c_wrapper/event.cpp +++ /dev/null @@ -1,294 +0,0 @@ -#include "event.h" -#include "command_queue.h" -#include "context.h" -#include "pyhelper.h" - -#include - -template class clobj; -template void print_arg(std::ostream&, const cl_event&, bool); -template void print_clobj(std::ostream&, const event*); -template void print_buf(std::ostream&, const cl_event*, - size_t, ArgType, bool, bool); - -class event_private { - mutable volatile std::atomic_bool m_finished; - virtual void finish() noexcept = 0; -public: - event_private() - : m_finished(false) - {} - virtual - ~event_private() - {} - void - call_finish() noexcept - { - if (m_finished.exchange(true)) - return; - finish(); - } - bool - is_finished() noexcept - { - return m_finished; - } -}; - -event::event(cl_event event, bool retain, event_private *p) - : clobj(event), m_p(p) -{ - if (retain) { - try { - pyopencl_call_guarded(clRetainEvent, PYOPENCL_CL_CASTABLE_THIS); - } catch (...) { - m_p->call_finish(); - delete m_p; - throw; - } - } -} - -#if PYOPENCL_CL_VERSION >= 0x1010 -static PYOPENCL_INLINE bool -release_private_use_cb(event *evt) -{ - try { - cl_int status = 0; - pyopencl_call_guarded(clGetEventInfo, evt, - CL_EVENT_COMMAND_EXECUTION_STATUS, - size_arg(status), nullptr); - // Event Callback may not be run immediately when the event - // is already completed. - if (status <= CL_COMPLETE) - return false; - cl_context ctx; - pyopencl_call_guarded(clGetEventInfo, evt, CL_EVENT_CONTEXT, - size_arg(ctx), nullptr); - int major; - int minor; - context::get_version(ctx, &major, &minor); - return (major > 1) || (major >= 1 && minor >= 1); - } catch (const clerror &e) { - cleanup_print_error(e.code(), e.what()); - return false; - } -} -#endif - -void -event::release_private() noexcept -{ - if (!m_p) - return; - if (m_p->is_finished()) { - delete m_p; - return; - } -#if PYOPENCL_CL_VERSION >= 0x1010 && defined(PYOPENCL_HAVE_EVENT_SET_CALLBACK) - if (release_private_use_cb(this)) { - try { - event_private *p = m_p; - set_callback(CL_COMPLETE, [p] (cl_int) { - p->call_finish(); - delete p; - }); - return; - } catch (const clerror &e) { - cleanup_print_error(e.code(), e.what()); - } - } -#endif - wait(); - delete m_p; -} - -event::~event() -{ - release_private(); - pyopencl_call_guarded_cleanup(clReleaseEvent, PYOPENCL_CL_CASTABLE_THIS); -} - -generic_info -event::get_info(cl_uint param_name) const -{ - switch ((cl_event_info)param_name) { - case CL_EVENT_COMMAND_QUEUE: - return pyopencl_get_opaque_info(command_queue, Event, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_EVENT_COMMAND_TYPE: - return pyopencl_get_int_info(cl_command_type, Event, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_EVENT_COMMAND_EXECUTION_STATUS: - return pyopencl_get_int_info(cl_int, Event, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_EVENT_REFERENCE_COUNT: - return pyopencl_get_int_info(cl_uint, Event, PYOPENCL_CL_CASTABLE_THIS, param_name); -#if PYOPENCL_CL_VERSION >= 0x1010 - case CL_EVENT_CONTEXT: - return pyopencl_get_opaque_info(context, Event, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif - - default: - throw clerror("Event.get_info", CL_INVALID_VALUE); - } -} - -generic_info -event::get_profiling_info(cl_profiling_info param) const -{ - switch (param) { - case CL_PROFILING_COMMAND_QUEUED: - case CL_PROFILING_COMMAND_SUBMIT: - case CL_PROFILING_COMMAND_START: - case CL_PROFILING_COMMAND_END: - return pyopencl_get_int_info(cl_ulong, EventProfiling, PYOPENCL_CL_CASTABLE_THIS, param); - default: - throw clerror("Event.get_profiling_info", CL_INVALID_VALUE); - } -} - -void -event::wait() const -{ - pyopencl_call_guarded(clWaitForEvents, len_arg(data())); - if (m_p) { - m_p->call_finish(); - } -} - -class nanny_event_private : public event_private { - void *m_ward; - void finish() noexcept - { - void *ward = m_ward; - m_ward = nullptr; - py::deref(ward); - } -public: - nanny_event_private(void *ward) - : m_ward(nullptr) - { - m_ward = py::ref(ward); - } - PYOPENCL_USE_RESULT PYOPENCL_INLINE void* - get_ward() const noexcept - { - return m_ward; - } -}; - -nanny_event::nanny_event(cl_event evt, bool retain, void *ward) - : event(evt, retain, ward ? new nanny_event_private(ward) : nullptr) -{ -} - -PYOPENCL_USE_RESULT void* -nanny_event::get_ward() const noexcept -{ - return (get_p() ? static_cast(get_p())->get_ward() : - nullptr); -} - -#if PYOPENCL_CL_VERSION >= 0x1010 -class user_event : public event { -public: - using event::event; - PYOPENCL_INLINE void - set_status(cl_int status) - { - pyopencl_call_guarded(clSetUserEventStatus, PYOPENCL_CL_CASTABLE_THIS, status); - } -}; -#endif - -// c wrapper - -// Event -error* -event__get_profiling_info(clobj_t _evt, cl_profiling_info param, - generic_info *out) -{ - auto evt = static_cast(_evt); - return c_handle_error([&] { - *out = evt->get_profiling_info(param); - }); -} - -error* -event__wait(clobj_t evt) -{ - return c_handle_error([&] { - static_cast(evt)->wait(); - }); -} - - -error* -event__set_callback(clobj_t _evt, cl_int type, void *pyobj) -{ -#if PYOPENCL_CL_VERSION >= 0x1010 && defined(PYOPENCL_HAVE_EVENT_SET_CALLBACK) - auto evt = static_cast(_evt); - return c_handle_error([&] { - pyobj = py::ref(pyobj); - try { - evt->set_callback(type, [=] (cl_int status) { - py::call(pyobj, status); - py::deref(pyobj); - }); - } catch (...) { - py::deref(pyobj); - } - }); -#else - PYOPENCL_UNSUPPORTED(clSetEventCallback, "CL 1.0 and below and Windows") -#endif -} - -// Nanny Event -void* -nanny_event__get_ward(clobj_t evt) -{ - return static_cast(evt)->get_ward(); -} - -error* -wait_for_events(const clobj_t *_wait_for, uint32_t num_wait_for) -{ - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_error([&] { - pyopencl_call_guarded(clWaitForEvents, wait_for); - }); -} - -error* -enqueue_wait_for_events(clobj_t _queue, const clobj_t *_wait_for, - uint32_t num_wait_for) -{ - auto queue = static_cast(_queue); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_error([&] { - pyopencl_call_guarded(clEnqueueWaitForEvents, queue, wait_for); - }); -} - -#if PYOPENCL_CL_VERSION >= 0x1010 - -error* -create_user_event(clobj_t *_evt, clobj_t _ctx) -{ - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - auto evt = pyopencl_call_guarded(clCreateUserEvent, ctx); - *_evt = pyopencl_convert_obj(user_event, clReleaseEvent, evt); - }); -} - -error* -user_event__set_status(clobj_t _evt, cl_int status) -{ - auto evt = static_cast(_evt); - return c_handle_error([&] { - evt->set_status(status); - }); -} - -#endif diff --git a/src/c_wrapper/event.h b/src/c_wrapper/event.h deleted file mode 100644 index c6d0dd4b62e43d5b48149760ef4ef099360432b0..0000000000000000000000000000000000000000 --- a/src/c_wrapper/event.h +++ /dev/null @@ -1,87 +0,0 @@ -#include "clhelper.h" -#include - -#ifndef __PYOPENCL_EVENT_H -#define __PYOPENCL_EVENT_H - -// {{{ event - -extern template class clobj; -extern template void print_arg(std::ostream&, const cl_event&, bool); -extern template void print_buf(std::ostream&, const cl_event*, - size_t, ArgType, bool, bool); - -class event_private; - -class event : public clobj { - event_private *m_p; - // return whether the event need to be released. - void release_private() noexcept; -protected: - PYOPENCL_INLINE event_private* - get_p() const - { - return m_p; - } -public: - PYOPENCL_DEF_CL_CLASS(EVENT); - event(cl_event event, bool retain, event_private *p=nullptr); - ~event(); - generic_info get_info(cl_uint param) const; - PYOPENCL_USE_RESULT generic_info - get_profiling_info(cl_profiling_info param) const; - void wait() const; -#if PYOPENCL_CL_VERSION >= 0x1010 && defined(PYOPENCL_HAVE_EVENT_SET_CALLBACK) - template - PYOPENCL_INLINE void - set_callback(cl_int type, Func &&_func) - { - auto func = new rm_ref_t(std::forward(_func)); - try { - pyopencl_call_guarded( - clSetEventCallback, PYOPENCL_CL_CASTABLE_THIS, type, - static_cast( - [] (cl_event, cl_int status, void *data) { - rm_ref_t *func = static_cast*>(data); - - // We won't necessarily be able to acquire the GIL inside this - // handler without deadlocking. Create a thread that *can* - // wait. - - std::thread t([func, status] () { - (*func)(status); - delete func; - }); - t.detach(); - - }), (void*)func); - } catch (...) { - delete func; - throw; - } - } -#endif -}; -static PYOPENCL_INLINE auto -event_out(clobj_t *ret) -> decltype(pyopencl_outarg(event, ret, clReleaseEvent)) -{ - return pyopencl_outarg(event, ret, clReleaseEvent); -} - -extern template void print_clobj(std::ostream&, const event*); - -class nanny_event : public event { -public: - nanny_event(cl_event evt, bool retain, void *ward=nullptr); - PYOPENCL_USE_RESULT void *get_ward() const noexcept; -}; -static PYOPENCL_INLINE auto -nanny_event_out(clobj_t *ret, void *ward) - -> decltype(pyopencl_outarg(nanny_event, ret, clReleaseEvent, ward)) -{ - return pyopencl_outarg(nanny_event, ret, clReleaseEvent, ward); -} - -// }}} - -#endif diff --git a/src/c_wrapper/function.h b/src/c_wrapper/function.h deleted file mode 100644 index 5d1a604c309e05d58b0e361971470d8db3c49571..0000000000000000000000000000000000000000 --- a/src/c_wrapper/function.h +++ /dev/null @@ -1,121 +0,0 @@ -#include -#include - -#ifndef __PYOPENCL_FUNCTION_H -#define __PYOPENCL_FUNCTION_H - -#if defined __GNUC__ && __GNUC__ > 3 -#define PYOPENCL_INLINE inline __attribute__((__always_inline__)) -#else -#define PYOPENCL_INLINE inline -#endif - -template -using rm_ref_t = typename std::remove_reference::type; -template -using rm_const_t = typename std::remove_const::type; -template -using enable_if_t = typename std::enable_if::type; - -template -struct seq { -}; - -template -struct gens : gens { -}; - -template -struct gens<0, S...> { - typedef seq type; -}; - -template -static PYOPENCL_INLINE auto -_call_func(Function func, seq, std::tuple &args) - -> decltype(func(std::forward(std::get(args))...)) -{ - return func(static_cast(std::get(args))...); -} - -template -static PYOPENCL_INLINE auto -call_tuple(Function &&func, T &&args) - -> decltype(_call_func(std::forward(func), - typename gens::value>::type(), - args)) -{ - return _call_func(std::forward(func), - typename gens::value>::type(), args); -} - -template class Convert, typename... Types> -using _ArgPackBase = std::tuple::type>...>; - -template class Convert, typename... Types> -class ArgPack : public _ArgPackBase { -public: - typedef _ArgPackBase tuple_base; -private: - template - static PYOPENCL_INLINE std::tuple - ensure_tuple(T &&v) - { - return std::tuple(std::forward(v)); - } - template - static PYOPENCL_INLINE std::tuple - ensure_tuple(std::tuple &&t) - { - return t; - } - - template - using ArgConvert = Convert >; - template class Getter, int... S> - PYOPENCL_INLINE auto - __get(seq) -#ifndef _MSC_VER - -> decltype(std::tuple_cat( - ensure_tuple(Getter >::get( - std::get(*(tuple_base*)this)))...)) -#endif - { - return std::tuple_cat( - ensure_tuple(Getter >::get( - std::get(*(tuple_base*)this)))...); - } -public: - template - ArgPack(Types2&&... arg_orig) - : tuple_base(ArgConvert >(arg_orig)...) - { - } - ArgPack(ArgPack &&other) - : tuple_base(static_cast(other)) - { - } - // GCC Bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57543 - template class Getter> - PYOPENCL_INLINE auto - get() -> decltype(this->__get( - typename gens::type())) - { - return __get(typename gens::type()); - } - template class Getter, typename Func> - PYOPENCL_INLINE auto - call(Func func) -> decltype(call_tuple(func, this->get())) - { - return call_tuple(func, this->get()); - } -}; - -template class Convert, typename... Types> -static PYOPENCL_INLINE ArgPack...> -make_argpack(Types&&... args) -{ - return ArgPack...>(std::forward(args)...); -} - -#endif diff --git a/src/c_wrapper/gl_obj.cpp b/src/c_wrapper/gl_obj.cpp deleted file mode 100644 index bd7edf31d8ce772adae21047ab34e3eb925f1482..0000000000000000000000000000000000000000 --- a/src/c_wrapper/gl_obj.cpp +++ /dev/null @@ -1,155 +0,0 @@ -#include "gl_obj.h" -#include "context.h" -#include "command_queue.h" -#include "event.h" -#include "clhelper.h" - -#ifdef HAVE_GL - -template void print_clobj(std::ostream&, const gl_buffer*); -template void print_clobj(std::ostream&, - const gl_renderbuffer*); - -generic_info -gl_texture::get_gl_texture_info(cl_gl_texture_info param_name) const -{ - switch (param_name) { - case CL_GL_TEXTURE_TARGET: - return pyopencl_get_int_info(GLenum, GLTexture, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_GL_MIPMAP_LEVEL: - return pyopencl_get_int_info(GLint, GLTexture, PYOPENCL_CL_CASTABLE_THIS, param_name); - default: - throw clerror("MemoryObject.get_gl_texture_info", CL_INVALID_VALUE); - } -} - -typedef cl_int (CL_API_CALL *clEnqueueGLObjectFunc)(cl_command_queue, cl_uint, - const cl_mem*, cl_uint, - const cl_event*, cl_event*); - -static PYOPENCL_INLINE void -enqueue_gl_objects(clEnqueueGLObjectFunc func, const char *name, - clobj_t *evt, command_queue *cq, const clobj_t *mem_objects, - uint32_t num_mem_objects, const clobj_t *wait_for, - uint32_t num_wait_for) -{ - const auto _wait_for = buf_from_class(wait_for, num_wait_for); - const auto _mem_objs = buf_from_class( - mem_objects, num_mem_objects); - call_guarded(func, name, cq, _mem_objs, _wait_for, event_out(evt)); -} -#define enqueue_gl_objects(what, ...) \ - enqueue_gl_objects(clEnqueue##what##GLObjects, \ - "clEnqueue" #what "GLObjects", __VA_ARGS__) - -// c wrapper - -error* -create_from_gl_texture(clobj_t *ptr, clobj_t _ctx, cl_mem_flags flags, - GLenum texture_target, GLint miplevel, - GLuint texture) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - cl_mem mem = pyopencl_call_guarded(clCreateFromGLTexture, - ctx, flags, texture_target, miplevel, texture); - *ptr = pyopencl_convert_obj(gl_texture, clReleaseMemObject, mem); - }); -#else - PYOPENCL_UNSUPPORTED(clCreateFromGLTexture, "CL 1.1") -#endif -} - -error* -create_from_gl_buffer(clobj_t *ptr, clobj_t _ctx, - cl_mem_flags flags, GLuint bufobj) -{ - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - cl_mem mem = pyopencl_call_guarded(clCreateFromGLBuffer, - ctx, flags, bufobj); - *ptr = pyopencl_convert_obj(gl_buffer, clReleaseMemObject, mem); - }); -} - -error* -create_from_gl_renderbuffer(clobj_t *ptr, clobj_t _ctx, - cl_mem_flags flags, GLuint bufobj) -{ - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - cl_mem mem = pyopencl_call_guarded(clCreateFromGLRenderbuffer, - ctx, flags, bufobj); - *ptr = pyopencl_convert_obj(gl_renderbuffer, - clReleaseMemObject, mem); - }); -} - -error* -enqueue_acquire_gl_objects(clobj_t *evt, clobj_t queue, - const clobj_t *mem_objects, - uint32_t num_mem_objects, - const clobj_t *wait_for, uint32_t num_wait_for) -{ - return c_handle_error([&] { - enqueue_gl_objects( - Acquire, evt, static_cast(queue), - mem_objects, num_mem_objects, wait_for, num_wait_for); - }); -} - -error* -enqueue_release_gl_objects(clobj_t *evt, clobj_t queue, - const clobj_t *mem_objects, - uint32_t num_mem_objects, - const clobj_t *wait_for, uint32_t num_wait_for) -{ - return c_handle_error([&] { - enqueue_gl_objects( - Release, evt, static_cast(queue), - mem_objects, num_mem_objects, wait_for, num_wait_for); - }); -} - -error* -get_gl_object_info(clobj_t mem, cl_gl_object_type *otype, GLuint *gl_name) -{ - auto globj = static_cast(mem); - return c_handle_error([&] { - pyopencl_call_guarded(clGetGLObjectInfo, globj, buf_arg(*otype), - buf_arg(*gl_name)); - }); -} - -#endif - -int -have_gl() -{ -#ifdef HAVE_GL - return 1; -#else - return 0; -#endif -} - -cl_context_properties -get_apple_cgl_share_group() -{ -#if (defined(__APPLE__) && !defined(PYOPENCL_APPLE_USE_CL_H)) - #ifdef HAVE_GL - CGLContextObj kCGLContext = CGLGetCurrentContext(); - CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext); - - return (cl_context_properties)kCGLShareGroup; - #else - throw clerror("get_apple_cgl_share_group unavailable: " - "GL interop not compiled", - CL_INVALID_VALUE); - #endif -#else - throw clerror("get_apple_cgl_share_group unavailable: non-Apple platform", - CL_INVALID_VALUE); -#endif /* __APPLE__ */ -} diff --git a/src/c_wrapper/gl_obj.h b/src/c_wrapper/gl_obj.h deleted file mode 100644 index 9f47e19b2dab3f93b35f0cd0c65f39471339d6f4..0000000000000000000000000000000000000000 --- a/src/c_wrapper/gl_obj.h +++ /dev/null @@ -1,46 +0,0 @@ -#include "image.h" - -#ifndef __PYOPENCL_GL_OBJ_H -#define __PYOPENCL_GL_OBJ_H - -#ifdef HAVE_GL - -// {{{ gl interop - -class gl_buffer : public memory_object { -public: - PYOPENCL_DEF_CL_CLASS(GL_BUFFER); - PYOPENCL_INLINE - gl_buffer(cl_mem mem, bool retain) - : memory_object(mem, retain) - {} -}; - -class gl_renderbuffer : public memory_object { -public: - PYOPENCL_DEF_CL_CLASS(GL_RENDERBUFFER); - PYOPENCL_INLINE - gl_renderbuffer(cl_mem mem, bool retain) - : memory_object(mem, retain) - {} -}; - -extern template void print_clobj(std::ostream&, const gl_buffer*); -extern template void print_clobj(std::ostream&, - const gl_renderbuffer*); - -class gl_texture : public image { - public: - PYOPENCL_INLINE - gl_texture(cl_mem mem, bool retain) - : image(mem, retain) - {} - PYOPENCL_USE_RESULT generic_info - get_gl_texture_info(cl_gl_texture_info param_name) const; -}; - -// }}} - -#endif - -#endif diff --git a/src/c_wrapper/image.cpp b/src/c_wrapper/image.cpp deleted file mode 100644 index 6f571f3208d13f3fd80e22db604458f7a7ca2617..0000000000000000000000000000000000000000 --- a/src/c_wrapper/image.cpp +++ /dev/null @@ -1,237 +0,0 @@ -#include "image.h" -#include "context.h" -#include "command_queue.h" -#include "event.h" -#include "buffer.h" - -template void print_clobj(std::ostream&, const image*); - -PYOPENCL_USE_RESULT static PYOPENCL_INLINE image* -new_image(cl_mem mem, const cl_image_format *fmt) -{ - return pyopencl_convert_obj(image, clReleaseMemObject, mem, fmt); -} - -generic_info -image::get_image_info(cl_image_info param) const -{ - switch (param) { - case CL_IMAGE_FORMAT: - return pyopencl_get_int_info(cl_image_format, Image, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_IMAGE_ELEMENT_SIZE: - case CL_IMAGE_ROW_PITCH: - case CL_IMAGE_SLICE_PITCH: - case CL_IMAGE_WIDTH: - case CL_IMAGE_HEIGHT: - case CL_IMAGE_DEPTH: -#if PYOPENCL_CL_VERSION >= 0x1020 - case CL_IMAGE_ARRAY_SIZE: -#endif - return pyopencl_get_int_info(size_t, Image, PYOPENCL_CL_CASTABLE_THIS, param); - -#if PYOPENCL_CL_VERSION >= 0x1020 - // TODO: - // case CL_IMAGE_BUFFER: - // { - // cl_mem param_value; - // PYOPENCL_CALL_GUARDED(clGetImageInfo, (this, param, sizeof(param_value), ¶m_value, 0)); - // if (param_value == 0) - // { - // // no associated memory object? no problem. - // return py::object(); - // } - // return create_mem_object_wrapper(param_value); - // } - case CL_IMAGE_NUM_MIP_LEVELS: - case CL_IMAGE_NUM_SAMPLES: - return pyopencl_get_int_info(cl_uint, Image, PYOPENCL_CL_CASTABLE_THIS, param); -#endif - default: - throw clerror("Image.get_image_info", CL_INVALID_VALUE); - } -} - -// c wrapper - -// Image -error* -create_image_2d(clobj_t *img, clobj_t _ctx, cl_mem_flags flags, - cl_image_format *fmt, size_t width, size_t height, - size_t pitch, void *buf) -{ - auto ctx = static_cast(_ctx); - return c_handle_retry_mem_error([&] { - auto mem = pyopencl_call_guarded(clCreateImage2D, ctx, flags, fmt, - width, height, pitch, buf); - *img = new_image(mem, fmt); - }); -} - -error* -create_image_3d(clobj_t *img, clobj_t _ctx, cl_mem_flags flags, - cl_image_format *fmt, size_t width, size_t height, - size_t depth, size_t pitch_x, size_t pitch_y, void *buf) -{ - auto ctx = static_cast(_ctx); - return c_handle_retry_mem_error([&] { - auto mem = pyopencl_call_guarded(clCreateImage3D, ctx, flags, fmt, - width, height, depth, pitch_x, - pitch_y, buf); - *img = new_image(mem, fmt); - }); -} - - -error* -create_image_from_desc(clobj_t *img, clobj_t _ctx, cl_mem_flags flags, - cl_image_format *fmt, cl_image_desc *desc, void *buf) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - auto mem = pyopencl_call_guarded(clCreateImage, ctx, flags, fmt, - desc, buf); - *img = new_image(mem, fmt); - }); -#else - PYOPENCL_UNSUPPORTED(clCreateImage, "CL 1.1 and below") -#endif -} - - -error* -image__get_image_info(clobj_t _img, cl_image_info param, generic_info *out) -{ - auto img = static_cast(_img); - return c_handle_error([&] { - *out = img->get_image_info(param); - }); -} - -type_t -image__get_fill_type(clobj_t img) -{ - return static_cast(img)->get_fill_type(); -} - -error* -enqueue_read_image(clobj_t *evt, clobj_t _queue, clobj_t _mem, - const size_t *_orig, size_t orig_l, - const size_t *_reg, size_t reg_l, void *buf, - size_t row_pitch, size_t slice_pitch, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block, void *pyobj) -{ - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto img = static_cast(_mem); - ConstBuffer orig(_orig, orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueReadImage, queue, img, bool(block), - orig, reg, row_pitch, slice_pitch, buf, - wait_for, nanny_event_out(evt, pyobj)); - }); -} - -error* -enqueue_copy_image(clobj_t *evt, clobj_t _queue, clobj_t _src, clobj_t _dst, - const size_t *_src_orig, size_t src_orig_l, - const size_t *_dst_orig, size_t dst_orig_l, - const size_t *_reg, size_t reg_l, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - auto src = static_cast(_src); - auto dst = static_cast(_dst); - ConstBuffer src_orig(_src_orig, src_orig_l); - ConstBuffer dst_orig(_dst_orig, dst_orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueCopyImage, queue, src, dst, src_orig, - dst_orig, reg, wait_for, event_out(evt)); - }); -} - -error* -enqueue_write_image(clobj_t *evt, clobj_t _queue, clobj_t _mem, - const size_t *_orig, size_t orig_l, - const size_t *_reg, size_t reg_l, - const void *buf, size_t row_pitch, size_t slice_pitch, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block, void *pyobj) -{ - auto queue = static_cast(_queue); - auto img = static_cast(_mem); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - ConstBuffer orig(_orig, orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueWriteImage, queue, img, bool(block), - orig, reg, row_pitch, slice_pitch, buf, - wait_for, nanny_event_out(evt, pyobj)); - }); -} - -error* -enqueue_fill_image(clobj_t *evt, clobj_t _queue, clobj_t mem, - const void *color, const size_t *_orig, size_t orig_l, - const size_t *_reg, size_t reg_l, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - // TODO debug color - auto queue = static_cast(_queue); - auto img = static_cast(mem); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - ConstBuffer orig(_orig, orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueFillImage, queue, img, color, orig, - reg, wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED(clEnqueueFillImage, "CL 1.1 and below") -#endif -} - -// {{{ image transfers - -error* -enqueue_copy_image_to_buffer(clobj_t *evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, const size_t *_orig, size_t orig_l, - const size_t *_reg, size_t reg_l, size_t offset, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ - auto queue = static_cast(_queue); - auto src = static_cast(_src); - auto dst = static_cast(_dst); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - ConstBuffer orig(_orig, orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueCopyImageToBuffer, queue, src, dst, - orig, reg, offset, wait_for, event_out(evt)); - }); -} - -error* -enqueue_copy_buffer_to_image(clobj_t *evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, size_t offset, const size_t *_orig, - size_t orig_l, const size_t *_reg, size_t reg_l, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ - auto queue = static_cast(_queue); - auto src = static_cast(_src); - auto dst = static_cast(_dst); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - ConstBuffer orig(_orig, orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueCopyBufferToImage, queue, src, dst, - offset, orig, reg, wait_for, event_out(evt)); - }); -} - -// }}} diff --git a/src/c_wrapper/image.h b/src/c_wrapper/image.h deleted file mode 100644 index 7d29909c9d30896ae915cb32138561ab115f0950..0000000000000000000000000000000000000000 --- a/src/c_wrapper/image.h +++ /dev/null @@ -1,50 +0,0 @@ -#include "memory_object.h" -#include "clhelper.h" - -#ifndef __PYOPENCL_IMAGE_H -#define __PYOPENCL_IMAGE_H - -// {{{ image - -class image : public memory_object { -private: - cl_image_format m_format; -public: - PYOPENCL_DEF_CL_CLASS(IMAGE); - PYOPENCL_INLINE - image(cl_mem mem, bool retain, const cl_image_format *fmt=0) - : memory_object(mem, retain), m_format(fmt ? *fmt : cl_image_format()) - {} - PYOPENCL_INLINE const cl_image_format& - format() - { - if (!m_format.image_channel_data_type) { - pyopencl_call_guarded(clGetImageInfo, PYOPENCL_CL_CASTABLE_THIS, CL_IMAGE_FORMAT, - size_arg(m_format), nullptr); - } - return m_format; - } - PYOPENCL_USE_RESULT generic_info get_image_info(cl_image_info param) const; - PYOPENCL_INLINE type_t - get_fill_type() - { - switch (format().image_channel_data_type) { - case CL_SIGNED_INT8: - case CL_SIGNED_INT16: - case CL_SIGNED_INT32: - return TYPE_INT; - case CL_UNSIGNED_INT8: - case CL_UNSIGNED_INT16: - case CL_UNSIGNED_INT32: - return TYPE_UINT; - default: - return TYPE_FLOAT; - } - } -}; - -extern template void print_clobj(std::ostream&, const image*); - -// }}} - -#endif diff --git a/src/c_wrapper/kernel.cpp b/src/c_wrapper/kernel.cpp deleted file mode 100644 index 817e10619727ae7d060c40d3096b637ec5923629..0000000000000000000000000000000000000000 --- a/src/c_wrapper/kernel.cpp +++ /dev/null @@ -1,213 +0,0 @@ -#include "kernel.h" -#include "context.h" -#include "device.h" -#include "program.h" -#include "memory_object.h" -#include "sampler.h" -#include "command_queue.h" -#include "event.h" -#include "clhelper.h" - -template class clobj; -template void print_arg(std::ostream&, const cl_kernel&, bool); -template void print_clobj(std::ostream&, const kernel*); -template void print_buf(std::ostream&, const cl_kernel*, - size_t, ArgType, bool, bool); - -kernel::~kernel() -{ - pyopencl_call_guarded_cleanup(clReleaseKernel, PYOPENCL_CL_CASTABLE_THIS); -} - -generic_info -kernel::get_info(cl_uint param) const -{ - switch ((cl_kernel_info)param) { - case CL_KERNEL_FUNCTION_NAME: - return pyopencl_get_str_info(Kernel, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_KERNEL_NUM_ARGS: - case CL_KERNEL_REFERENCE_COUNT: - return pyopencl_get_int_info(cl_uint, Kernel, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_KERNEL_CONTEXT: - return pyopencl_get_opaque_info(context, Kernel, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_KERNEL_PROGRAM: - return pyopencl_get_opaque_info(program, Kernel, PYOPENCL_CL_CASTABLE_THIS, param); -#if PYOPENCL_CL_VERSION >= 0x1020 - case CL_KERNEL_ATTRIBUTES: - return pyopencl_get_str_info(Kernel, PYOPENCL_CL_CASTABLE_THIS, param); -#endif - default: - throw clerror("Kernel.get_info", CL_INVALID_VALUE); - } -} - -generic_info -kernel::get_work_group_info(cl_kernel_work_group_info param, - const device *dev) const -{ - switch (param) { -#if PYOPENCL_CL_VERSION >= 0x1010 - case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: -#endif - case CL_KERNEL_WORK_GROUP_SIZE: - return pyopencl_get_int_info(size_t, KernelWorkGroup, PYOPENCL_CL_CASTABLE_THIS, dev, param); - case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: - return pyopencl_get_array_info(size_t, KernelWorkGroup, - PYOPENCL_CL_CASTABLE_THIS, dev, param); - case CL_KERNEL_LOCAL_MEM_SIZE: -#if PYOPENCL_CL_VERSION >= 0x1010 - case CL_KERNEL_PRIVATE_MEM_SIZE: -#endif - return pyopencl_get_int_info(cl_ulong, KernelWorkGroup, - PYOPENCL_CL_CASTABLE_THIS, dev, param); - default: - throw clerror("Kernel.get_work_group_info", CL_INVALID_VALUE); - } -} - -#if PYOPENCL_CL_VERSION >= 0x1020 -PYOPENCL_USE_RESULT generic_info -kernel::get_arg_info(cl_uint idx, cl_kernel_arg_info param) const -{ - switch (param) { - case CL_KERNEL_ARG_ADDRESS_QUALIFIER: - return pyopencl_get_int_info(cl_kernel_arg_address_qualifier, - KernelArg, PYOPENCL_CL_CASTABLE_THIS, idx, param); - case CL_KERNEL_ARG_ACCESS_QUALIFIER: - return pyopencl_get_int_info(cl_kernel_arg_access_qualifier, - KernelArg, PYOPENCL_CL_CASTABLE_THIS, idx, param); - case CL_KERNEL_ARG_TYPE_QUALIFIER: - return pyopencl_get_int_info(cl_kernel_arg_type_qualifier, - KernelArg, PYOPENCL_CL_CASTABLE_THIS, idx, param); - case CL_KERNEL_ARG_TYPE_NAME: - case CL_KERNEL_ARG_NAME: - return pyopencl_get_str_info(KernelArg, PYOPENCL_CL_CASTABLE_THIS, idx, param); - default: - throw clerror("Kernel.get_arg_info", CL_INVALID_VALUE); - } -} -#endif - -// c wrapper - -// Kernel -error* -create_kernel(clobj_t *knl, clobj_t _prog, const char *name) -{ - auto prog = static_cast(_prog); - return c_handle_error([&] { - *knl = new kernel(pyopencl_call_guarded(clCreateKernel, prog, - name), false); - }); -} - -error* -kernel__set_arg_null(clobj_t _knl, cl_uint arg_index) -{ - auto knl = static_cast(_knl); - return c_handle_error([&] { - const cl_mem m = 0; - pyopencl_call_guarded(clSetKernelArg, knl, arg_index, size_arg(m)); - }); -} - -error* -kernel__set_arg_mem(clobj_t _knl, cl_uint arg_index, clobj_t _mem) -{ - auto knl = static_cast(_knl); - auto mem = static_cast(_mem); - return c_handle_error([&] { - pyopencl_call_guarded(clSetKernelArg, knl, arg_index, - size_arg(mem->data())); - }); -} - -error* -kernel__set_arg_sampler(clobj_t _knl, cl_uint arg_index, clobj_t _samp) -{ - auto knl = static_cast(_knl); - auto samp = static_cast(_samp); - return c_handle_error([&] { - pyopencl_call_guarded(clSetKernelArg, knl, arg_index, - size_arg(samp->data())); - }); -} - -error* -kernel__set_arg_buf(clobj_t _knl, cl_uint arg_index, - const void *buffer, size_t size) -{ - auto knl = static_cast(_knl); - return c_handle_error([&] { - pyopencl_call_guarded(clSetKernelArg, knl, arg_index, - size_arg(buffer, size)); - }); -} - -error* -kernel__set_arg_svm_pointer(clobj_t _knl, cl_uint arg_index, void *value) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - auto knl = static_cast(_knl); - return c_handle_error([&] { - pyopencl_call_guarded(clSetKernelArgSVMPointer, knl, arg_index, value); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clSetKernelArgSVMPointer, "CL 2.0") -#endif -} - -error* -kernel__get_work_group_info(clobj_t _knl, cl_kernel_work_group_info param, - clobj_t _dev, generic_info *out) -{ - auto knl = static_cast(_knl); - auto dev = static_cast(_dev); - return c_handle_error([&] { - *out = knl->get_work_group_info(param, dev); - }); -} - -error* -kernel__get_arg_info(clobj_t _knl, cl_uint idx, cl_kernel_arg_info param, - generic_info *out) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto knl = static_cast(_knl); - return c_handle_error([&] { - *out = knl->get_arg_info(idx, param); - }); -#else - PYOPENCL_UNSUPPORTED(clKernelGetArgInfo, "CL 1.1 and below") -#endif -} - -error* -enqueue_nd_range_kernel(clobj_t *evt, clobj_t _queue, clobj_t _knl, - cl_uint work_dim, const size_t *global_work_offset, - const size_t *global_work_size, - const size_t *local_work_size, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ - auto queue = static_cast(_queue); - auto knl = static_cast(_knl); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueNDRangeKernel, queue, knl, work_dim, - global_work_offset, global_work_size, - local_work_size, wait_for, event_out(evt)); - }); -} - -error* -enqueue_task(clobj_t *evt, clobj_t _queue, clobj_t _knl, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ - auto queue = static_cast(_queue); - auto knl = static_cast(_knl); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueTask, queue, knl, wait_for, - event_out(evt)); - }); -} diff --git a/src/c_wrapper/kernel.h b/src/c_wrapper/kernel.h deleted file mode 100644 index 5db1a0cc53bcd8171cb6973182d5e82de4ddc479..0000000000000000000000000000000000000000 --- a/src/c_wrapper/kernel.h +++ /dev/null @@ -1,44 +0,0 @@ -#include "error.h" - -#ifndef __PYOPENCL_KERNEL_H -#define __PYOPENCL_KERNEL_H - -class device; - -// {{{ kernel - -extern template class clobj; -extern template void print_arg(std::ostream&, - const cl_kernel&, bool); -extern template void print_buf(std::ostream&, const cl_kernel*, - size_t, ArgType, bool, bool); - -class kernel : public clobj { -public: - PYOPENCL_DEF_CL_CLASS(KERNEL); - PYOPENCL_INLINE - kernel(cl_kernel knl, bool retain) - : clobj(knl) - { - if (retain) { - pyopencl_call_guarded(clRetainKernel, PYOPENCL_CL_CASTABLE_THIS); - } - } - ~kernel(); - generic_info get_info(cl_uint param) const; - - PYOPENCL_USE_RESULT generic_info - get_work_group_info(cl_kernel_work_group_info param, - const device *dev) const; - -#if PYOPENCL_CL_VERSION >= 0x1020 - PYOPENCL_USE_RESULT generic_info - get_arg_info(cl_uint idx, cl_kernel_arg_info param) const; -#endif -}; - -extern template void print_clobj(std::ostream&, const kernel*); - -// }}} - -#endif diff --git a/src/c_wrapper/memory_map.cpp b/src/c_wrapper/memory_map.cpp deleted file mode 100644 index 068274df6d6d4c306682ab372b560817d454037a..0000000000000000000000000000000000000000 --- a/src/c_wrapper/memory_map.cpp +++ /dev/null @@ -1,115 +0,0 @@ -#include "memory_map.h" -#include "image.h" -#include "buffer.h" -#include "event.h" -#include "clhelper.h" - -template class clobj; -template void print_arg(std::ostream&, void *const&, bool); -template void print_buf(std::ostream&, void *const*, - size_t, ArgType, bool, bool); - -memory_map::~memory_map() -{ - if (!m_valid.exchange(false)) - return; - pyopencl_call_guarded_cleanup(clEnqueueUnmapMemObject, m_queue, - m_mem, PYOPENCL_CL_CASTABLE_THIS, 0, nullptr, nullptr); -} - -void -memory_map::release(clobj_t *evt, const command_queue *queue, - const clobj_t *_wait_for, uint32_t num_wait_for) const -{ - if (!m_valid.exchange(false)) { - throw clerror("MemoryMap.release", CL_INVALID_VALUE, - "trying to double-unref mem map"); - } - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - queue = queue ? queue : &m_queue; - pyopencl_call_guarded(clEnqueueUnmapMemObject, queue, - m_mem, PYOPENCL_CL_CASTABLE_THIS, wait_for, event_out(evt)); -} - -generic_info -memory_map::get_info(cl_uint) const -{ - throw clerror("MemoryMap.get_info", CL_INVALID_VALUE); -} - -intptr_t -memory_map::intptr() const -{ - return m_valid ? (intptr_t)data() : 0; -} - -memory_map* -convert_memory_map(clobj_t evt, command_queue *queue, - memory_object *buf, void *res) -{ - try { - return new memory_map(queue, buf, res); - } catch (...) { - delete evt; - pyopencl_call_guarded_cleanup(clEnqueueUnmapMemObject, queue, - buf, res, 0, nullptr, nullptr); - throw; - } -} - -// c wrapper - -// Memory Map -error* -memory_map__release(clobj_t _map, clobj_t _queue, const clobj_t *_wait_for, - uint32_t num_wait_for, clobj_t *evt) -{ - auto map = static_cast(_map); - auto queue = static_cast(_queue); - return c_handle_error([&] { - map->release(evt, queue, _wait_for, num_wait_for); - }); -} - -void* -memory_map__data(clobj_t _map) -{ - return static_cast(_map)->data(); -} - -error* -enqueue_map_image(clobj_t *evt, clobj_t *map, clobj_t _queue, clobj_t _mem, - cl_map_flags flags, const size_t *_orig, size_t orig_l, - const size_t *_reg, size_t reg_l, size_t *row_pitch, - size_t *slice_pitch, const clobj_t *_wait_for, - uint32_t num_wait_for, int block) -{ - auto queue = static_cast(_queue); - auto img = static_cast(_mem); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - ConstBuffer orig(_orig, orig_l); - ConstBuffer reg(_reg, reg_l, 1); - return c_handle_retry_mem_error([&] { - void *res = pyopencl_call_guarded( - clEnqueueMapImage, queue, img, bool(block), flags, orig, - reg, row_pitch, slice_pitch, wait_for, event_out(evt)); - *map = convert_memory_map(*evt, queue, img, res); - }); -} - -error* -enqueue_map_buffer(clobj_t *evt, clobj_t *map, clobj_t _queue, clobj_t _mem, - cl_map_flags flags, size_t offset, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block) -{ - auto queue = static_cast(_queue); - auto buf = static_cast(_mem); - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - return c_handle_retry_mem_error([&] { - void *res = pyopencl_call_guarded( - clEnqueueMapBuffer, queue, buf, bool(block), - flags, offset, size, wait_for, event_out(evt)); - *map = convert_memory_map(*evt, queue, buf, res); - }); -} diff --git a/src/c_wrapper/memory_map.h b/src/c_wrapper/memory_map.h deleted file mode 100644 index 65a988a9b0a462bcc9e145eade6d59363eb98279..0000000000000000000000000000000000000000 --- a/src/c_wrapper/memory_map.h +++ /dev/null @@ -1,37 +0,0 @@ -#include "error.h" -#include "command_queue.h" -#include "memory_object.h" - -#ifndef __PYOPENCL_MEMORY_MAP_H -#define __PYOPENCL_MEMORY_MAP_H - -class event; - -// {{{ memory_map - -extern template class clobj; -extern template void print_arg(std::ostream&, void *const&, bool); -extern template void print_buf(std::ostream&, void *const*, - size_t, ArgType, bool, bool); - -class memory_map : public clobj { -private: - mutable volatile std::atomic_bool m_valid; - command_queue m_queue; - memory_object m_mem; -public: - constexpr static const char *class_name = "MEMORY_MAP"; - PYOPENCL_INLINE - memory_map(const command_queue *queue, const memory_object *mem, void *ptr) - : clobj(ptr), m_valid(true), m_queue(*queue), m_mem(*mem) - {} - ~memory_map(); - void release(clobj_t *evt, const command_queue *queue, - const clobj_t *wait_for, uint32_t num_wait_for) const; - generic_info get_info(cl_uint) const; - intptr_t intptr() const; -}; - -// }}} - -#endif diff --git a/src/c_wrapper/memory_object.cpp b/src/c_wrapper/memory_object.cpp deleted file mode 100644 index 6f1ba321307931e6194860b9db292aa2a2fa372a..0000000000000000000000000000000000000000 --- a/src/c_wrapper/memory_object.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include "memory_object.h" -#include "context.h" -#include "event.h" -#include "command_queue.h" -#include "clhelper.h" - -template class clobj; -template void print_arg(std::ostream&, const cl_mem&, bool); -template void print_buf(std::ostream&, const cl_mem*, - size_t, ArgType, bool, bool); - -generic_info -memory_object::get_info(cl_uint param_name) const -{ - switch ((cl_mem_info)param_name) { - case CL_MEM_TYPE: - return pyopencl_get_int_info(cl_mem_object_type, MemObject, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_MEM_FLAGS: - return pyopencl_get_int_info(cl_mem_flags, MemObject, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_MEM_SIZE: - return pyopencl_get_int_info(size_t, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_MEM_HOST_PTR: - throw clerror("MemoryObject.get_info", CL_INVALID_VALUE, - "Use MemoryObject.get_host_array to get " - "host pointer."); - case CL_MEM_MAP_COUNT: - case CL_MEM_REFERENCE_COUNT: - return pyopencl_get_int_info(cl_uint, MemObject, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_MEM_CONTEXT: - return pyopencl_get_opaque_info(context, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); - -#if PYOPENCL_CL_VERSION >= 0x1010 - // TODO - // case CL_MEM_ASSOCIATED_MEMOBJECT: - // { - // cl_mem param_value; - // PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, (this, param_name, sizeof(param_value), ¶m_value, 0)); - // if (param_value == 0) - // { - // // no associated memory object? no problem. - // return py::object(); - // } - - // return create_mem_object_wrapper(param_value); - // } - case CL_MEM_OFFSET: - return pyopencl_get_int_info(size_t, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - case CL_MEM_USES_SVM_POINTER: - return pyopencl_get_int_info(cl_bool, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); -#endif - - default: - throw clerror("MemoryObject.get_info", CL_INVALID_VALUE); - } -} - -memory_object::~memory_object() -{ - if (!m_valid.exchange(false)) - return; - pyopencl_call_guarded_cleanup(clReleaseMemObject, PYOPENCL_CL_CASTABLE_THIS); -} - -// c wrapper - -// Memory Object -error* -memory_object__release(clobj_t obj) -{ - return c_handle_error([&] { - static_cast(obj)->release(); - }); -} - -error* -memory_object__get_host_array(clobj_t _obj, void **hostptr, size_t *size) -{ - auto obj = static_cast(_obj); - return c_handle_error([&] { - cl_mem_flags flags; - pyopencl_call_guarded(clGetMemObjectInfo, obj, CL_MEM_FLAGS, - size_arg(flags), nullptr); - if (!(flags & CL_MEM_USE_HOST_PTR)) - throw clerror("MemoryObject.get_host_array", CL_INVALID_VALUE, - "Only MemoryObject with USE_HOST_PTR " - "is supported."); - pyopencl_call_guarded(clGetMemObjectInfo, obj, CL_MEM_HOST_PTR, - size_arg(*hostptr), nullptr); - pyopencl_call_guarded(clGetMemObjectInfo, obj, CL_MEM_SIZE, - size_arg(*size), nullptr); - }); -} - -error* -enqueue_migrate_mem_objects(clobj_t *evt, clobj_t _queue, - const clobj_t *_mem_obj, uint32_t num_mem_obj, - cl_mem_migration_flags flags, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - const auto mem_obj = buf_from_class(_mem_obj, num_mem_obj); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded(clEnqueueMigrateMemObjects, queue, - mem_obj, flags, wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueMigrateMemObjects, "CL 1.2") -#endif -} diff --git a/src/c_wrapper/memory_object.h b/src/c_wrapper/memory_object.h deleted file mode 100644 index 635dc470ef1966d672799e7099d8452d52a56551..0000000000000000000000000000000000000000 --- a/src/c_wrapper/memory_object.h +++ /dev/null @@ -1,56 +0,0 @@ -#include "error.h" -#include - -#ifndef __PYOPENCL_MEMORY_OBJECT_H -#define __PYOPENCL_MEMORY_OBJECT_H - -// {{{ memory_object - -extern template class clobj; -extern template void print_arg(std::ostream&, const cl_mem&, bool); -extern template void print_buf(std::ostream&, const cl_mem*, - size_t, ArgType, bool, bool); - -class memory_object : public clobj { -private: - mutable volatile std::atomic_bool m_valid; -public: - constexpr static const char *class_name = "MEMORY_OBJECT"; - PYOPENCL_INLINE - memory_object(cl_mem mem, bool retain) - : clobj(mem), m_valid(true) - { - if (retain) { - pyopencl_call_guarded(clRetainMemObject, PYOPENCL_CL_CASTABLE_THIS); - } - } - PYOPENCL_INLINE - memory_object(const memory_object &mem) - : memory_object(mem.data(), true) - {} - ~memory_object(); - generic_info get_info(cl_uint param_name) const; - void - release() const - { - if (PYOPENCL_UNLIKELY(!m_valid.exchange(false))) { - throw clerror("MemoryObject.release", CL_INVALID_VALUE, - "trying to double-unref mem object"); - } - pyopencl_call_guarded(clReleaseMemObject, PYOPENCL_CL_CASTABLE_THIS); - } -#if 0 - PYOPENCL_USE_RESULT size_t - size() const - { - size_t param_value; - pyopencl_call_guarded(clGetMemObjectInfo, this, CL_MEM_SIZE, - size_arg(param_value), nullptr); - return param_value; - } -#endif -}; - -// }}} - -#endif diff --git a/src/c_wrapper/mingw-std-threads b/src/c_wrapper/mingw-std-threads deleted file mode 160000 index 776ce7faf9368ec9588ee77458799c281cb25737..0000000000000000000000000000000000000000 --- a/src/c_wrapper/mingw-std-threads +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 776ce7faf9368ec9588ee77458799c281cb25737 diff --git a/src/c_wrapper/platform.cpp b/src/c_wrapper/platform.cpp deleted file mode 100644 index 21a896b207b56f0d155f6e730651912a9ea04226..0000000000000000000000000000000000000000 --- a/src/c_wrapper/platform.cpp +++ /dev/null @@ -1,109 +0,0 @@ -#include "platform.h" -#include "device.h" -#include "clhelper.h" - -#include - -template class clobj; -template void print_arg(std::ostream&, - const cl_platform_id&, bool); -template void print_clobj(std::ostream&, const platform*); -template void print_buf(std::ostream&, const cl_platform_id*, - size_t, ArgType, bool, bool); - -generic_info -platform::get_info(cl_uint param_name) const -{ - switch ((cl_platform_info)param_name) { - case CL_PLATFORM_PROFILE: - case CL_PLATFORM_VERSION: - case CL_PLATFORM_NAME: - case CL_PLATFORM_VENDOR: -#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) - case CL_PLATFORM_EXTENSIONS: -#endif - return pyopencl_get_str_info(Platform, PYOPENCL_CL_CASTABLE_THIS, param_name); - default: - throw clerror("Platform.get_info", CL_INVALID_VALUE); - } -} - -void -platform::get_version(cl_platform_id plat, int *major, int *minor) -{ - char s_buff[128]; - size_t size; - pyopencl_buf d_buff(0); - char *name = s_buff; - pyopencl_call_guarded(clGetPlatformInfo, plat, CL_PLATFORM_VERSION, - 0, nullptr, buf_arg(size)); - if (PYOPENCL_UNLIKELY(size > sizeof(s_buff))) { - d_buff.resize(size); - name = d_buff.get(); - } - pyopencl_call_guarded(clGetPlatformInfo, plat, CL_PLATFORM_VERSION, - size_arg(name, size), buf_arg(size)); - *major = *minor = -1; - sscanf(name, "OpenCL %d.%d", major, minor); - // Well, hopefully there won't be a negative OpenCL version =) - if (*major < 0 || *minor < 0) { - throw clerror("Platform.get_version", CL_INVALID_VALUE, - "platform returned non-conformant " - "platform version string"); - } -} - -// c wrapper - -error* -get_platforms(clobj_t **_platforms, uint32_t *num_platforms) -{ - return c_handle_error([&] { - *num_platforms = 0; - pyopencl_call_guarded(clGetPlatformIDs, 0, nullptr, - buf_arg(*num_platforms)); - pyopencl_buf platforms(*num_platforms); - pyopencl_call_guarded(clGetPlatformIDs, platforms, - buf_arg(*num_platforms)); - *_platforms = buf_to_base(platforms).release(); - }); -} - -error* -platform__get_devices(clobj_t _plat, clobj_t **_devices, - uint32_t *num_devices, cl_device_type devtype) -{ - auto plat = static_cast(_plat); - return c_handle_error([&] { - *num_devices = 0; - try { - pyopencl_call_guarded(clGetDeviceIDs, plat, devtype, 0, nullptr, - buf_arg(*num_devices)); - } catch (const clerror &e) { - if (e.code() != CL_DEVICE_NOT_FOUND) - throw e; - *num_devices = 0; - } - if (*num_devices == 0) { - *_devices = nullptr; - return; - } - pyopencl_buf devices(*num_devices); - pyopencl_call_guarded(clGetDeviceIDs, plat, devtype, devices, - buf_arg(*num_devices)); - *_devices = buf_to_base(devices).release(); - }); -} - -error* -platform__unload_compiler(clobj_t plat) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - return c_handle_error([&] { - pyopencl_call_guarded(clUnloadPlatformCompiler, - static_cast(plat)); - }); -#else - PYOPENCL_UNSUPPORTED(clUnloadPlatformCompiler, "CL 1.1 and below") -#endif -} diff --git a/src/c_wrapper/platform.h b/src/c_wrapper/platform.h deleted file mode 100644 index 1bad5c298aecb8c55b536276d78f3d8e63d400ea..0000000000000000000000000000000000000000 --- a/src/c_wrapper/platform.h +++ /dev/null @@ -1,27 +0,0 @@ -#include "error.h" - -#ifndef __PYOPENCL_PLATFORM_H -#define __PYOPENCL_PLATFORM_H - -// {{{ platform - -extern template class clobj; -extern template void print_arg(std::ostream&, - const cl_platform_id&, bool); -extern template void print_buf( - std::ostream&, const cl_platform_id*, size_t, ArgType, bool, bool); - -class platform : public clobj { -public: - static void get_version(cl_platform_id plat, int *major, int *minor); - using clobj::clobj; - PYOPENCL_DEF_CL_CLASS(PLATFORM); - - generic_info get_info(cl_uint param_name) const; -}; - -extern template void print_clobj(std::ostream&, const platform*); - -// }}} - -#endif diff --git a/src/c_wrapper/program.cpp b/src/c_wrapper/program.cpp deleted file mode 100644 index a0535c06a9d33abdf4cb91d93a87d6141bd7407a..0000000000000000000000000000000000000000 --- a/src/c_wrapper/program.cpp +++ /dev/null @@ -1,269 +0,0 @@ -#include "program.h" -#include "device.h" -#include "context.h" -#include "clhelper.h" -#include "kernel.h" - -template class clobj; -template void print_arg(std::ostream&, const cl_program&, bool); -template void print_clobj(std::ostream&, const program*); -template void print_buf(std::ostream&, const cl_program*, - size_t, ArgType, bool, bool); - -PYOPENCL_USE_RESULT static PYOPENCL_INLINE program* -new_program(cl_program prog, program_kind_type progkind=KND_UNKNOWN) -{ - return pyopencl_convert_obj(program, clReleaseProgram, prog, progkind); -} - -program::~program() -{ - pyopencl_call_guarded_cleanup(clReleaseProgram, PYOPENCL_CL_CASTABLE_THIS); -} - -generic_info -program::get_info(cl_uint param) const -{ - switch ((cl_program_info)param) { - case CL_PROGRAM_CONTEXT: - return pyopencl_get_opaque_info(context, Program, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_PROGRAM_REFERENCE_COUNT: - case CL_PROGRAM_NUM_DEVICES: - return pyopencl_get_int_info(cl_uint, Program, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_PROGRAM_DEVICES: - return pyopencl_get_opaque_array_info(device, Program, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_PROGRAM_SOURCE: - return pyopencl_get_str_info(Program, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_PROGRAM_BINARY_SIZES: - return pyopencl_get_array_info(size_t, Program, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_PROGRAM_BINARIES: { - auto sizes = pyopencl_get_vec_info(size_t, Program, PYOPENCL_CL_CASTABLE_THIS, - CL_PROGRAM_BINARY_SIZES); - pyopencl_buf result_ptrs(sizes.len()); - for (size_t i = 0;i < sizes.len();i++) { - result_ptrs[i] = (char*)malloc(sizes[i]); - } - try { - pyopencl_call_guarded(clGetProgramInfo, PYOPENCL_CL_CASTABLE_THIS, CL_PROGRAM_BINARIES, - sizes.len() * sizeof(char*), - result_ptrs.get(), nullptr); - } catch (...) { - for (size_t i = 0;i < sizes.len();i++) { - free(result_ptrs[i]); - } - } - pyopencl_buf gis(sizes.len()); - for (size_t i = 0;i < sizes.len();i++) { - gis[i] = make_generic_info( - CLASS_NONE, - _copy_str(std::string("char[") + tostring(sizes[i]) + "]"), - true, - result_ptrs[i], - true); - } - return pyopencl_convert_array_info(generic_info, gis); - } - -#if PYOPENCL_CL_VERSION >= 0x1020 - case CL_PROGRAM_NUM_KERNELS: - return pyopencl_get_int_info(size_t, Program, PYOPENCL_CL_CASTABLE_THIS, param); - case CL_PROGRAM_KERNEL_NAMES: - return pyopencl_get_str_info(Program, PYOPENCL_CL_CASTABLE_THIS, param); -#endif - default: - throw clerror("Program.get_info", CL_INVALID_VALUE); - } -} - -generic_info -program::get_build_info(const device *dev, cl_program_build_info param) const -{ - switch (param) { - case CL_PROGRAM_BUILD_STATUS: - return pyopencl_get_int_info(cl_build_status, ProgramBuild, - PYOPENCL_CL_CASTABLE_THIS, dev, param); - case CL_PROGRAM_BUILD_OPTIONS: - case CL_PROGRAM_BUILD_LOG: - return pyopencl_get_str_info(ProgramBuild, PYOPENCL_CL_CASTABLE_THIS, dev, param); -#if PYOPENCL_CL_VERSION >= 0x1020 - case CL_PROGRAM_BINARY_TYPE: - return pyopencl_get_int_info(cl_program_binary_type, ProgramBuild, - PYOPENCL_CL_CASTABLE_THIS, dev, param); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE: - return pyopencl_get_int_info(size_t, ProgramBuild, - PYOPENCL_CL_CASTABLE_THIS, dev, param); -#endif - default: - throw clerror("Program.get_build_info", CL_INVALID_VALUE); - } -} - -#if PYOPENCL_CL_VERSION >= 0x1020 -void -program::compile(const char *opts, const clobj_t *_devs, size_t num_devs, - const clobj_t *_prgs, const char *const *names, - size_t num_hdrs) -{ - const auto devs = buf_from_class(_devs, num_devs); - const auto prgs = buf_from_class(_prgs, num_hdrs); - pyopencl_call_guarded(clCompileProgram, PYOPENCL_CL_CASTABLE_THIS, devs, opts, prgs, - buf_arg(names, num_hdrs), nullptr, nullptr); -} -#endif - -pyopencl_buf -program::all_kernels() -{ - cl_uint num_knls; - pyopencl_call_guarded(clCreateKernelsInProgram, PYOPENCL_CL_CASTABLE_THIS, 0, nullptr, - buf_arg(num_knls)); - pyopencl_buf knls(num_knls); - pyopencl_call_guarded(clCreateKernelsInProgram, PYOPENCL_CL_CASTABLE_THIS, knls, - buf_arg(num_knls)); - return buf_to_base(knls, true); -} - -// c wrapper - -// Program -error* -create_program_with_source(clobj_t *prog, clobj_t _ctx, const char *_src) -{ - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - const auto &src = _src; - const size_t length = strlen(src); - cl_program result = pyopencl_call_guarded( - clCreateProgramWithSource, ctx, len_arg(src), buf_arg(length)); - *prog = new_program(result, KND_SOURCE); - }); -} - -error* -create_program_with_il(clobj_t *prog, clobj_t _ctx, void *il, size_t length) -{ -#if PYOPENCL_CL_VERSION >= 0x2010 - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - cl_program result = pyopencl_call_guarded( - clCreateProgramWithIL, ctx, il, length); - *prog = new_program(result, KND_SOURCE); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clCreateProgramWithIL, "CL 2.1") -#endif -} - -error* -create_program_with_binary(clobj_t *prog, clobj_t _ctx, - cl_uint num_devices, const clobj_t *devices, - const unsigned char **binaries, size_t *binary_sizes) -{ - auto ctx = static_cast(_ctx); - const auto devs = buf_from_class(devices, num_devices); - pyopencl_buf binary_statuses(num_devices); - return c_handle_error([&] { - cl_program result = pyopencl_call_guarded( - clCreateProgramWithBinary, ctx, devs, - binary_sizes, binaries, binary_statuses.get()); - // for (cl_uint i = 0; i < num_devices; ++i) - // std::cout << i << ":" << binary_statuses[i] << std::endl; - *prog = new_program(result, KND_BINARY); - }); -} - -error* -program__build(clobj_t _prog, const char *options, - cl_uint num_devices, const clobj_t *_devices) -{ - auto prog = static_cast(_prog); - const auto devices = buf_from_class(_devices, num_devices); - return c_handle_error([&] { - pyopencl_call_guarded(clBuildProgram, prog, devices, options, - nullptr, nullptr); - }); -} - -error* -program__kind(clobj_t prog, int *kind) -{ - return c_handle_error([&] { - *kind = static_cast(prog)->kind(); - }); -} - -error* -program__get_build_info(clobj_t _prog, clobj_t _dev, - cl_program_build_info param, generic_info *out) -{ - auto prog = static_cast(_prog); - auto dev = static_cast(_dev); - return c_handle_error([&] { - *out = prog->get_build_info(dev, param); - }); -} - -error* -program__create_with_builtin_kernels(clobj_t *_prg, clobj_t _ctx, - const clobj_t *_devs, uint32_t num_devs, - const char *names) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - const auto devs = buf_from_class(_devs, num_devs); - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - auto prg = pyopencl_call_guarded(clCreateProgramWithBuiltInKernels, - ctx, devs, names); - *_prg = new_program(prg); - }); -#else - PYOPENCL_UNSUPPORTED(clCreateProgramWithBuiltInKernels, "CL 1.1 and below") -#endif -} - -error* -program__compile(clobj_t _prg, const char *opts, const clobj_t *_devs, - size_t num_devs, const clobj_t *_prgs, - const char *const *names, size_t num_hdrs) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - auto prg = static_cast(_prg); - return c_handle_error([&] { - prg->compile(opts, _devs, num_devs, _prgs, names, num_hdrs); - }); -#else - PYOPENCL_UNSUPPORTED(clCompileProgram, "CL 1.1 and below") -#endif -} - -error* -program__link(clobj_t *_prg, clobj_t _ctx, const clobj_t *_prgs, - size_t num_prgs, const char *opts, const clobj_t *_devs, - size_t num_devs) -{ -#if PYOPENCL_CL_VERSION >= 0x1020 - const auto devs = buf_from_class(_devs, num_devs); - const auto prgs = buf_from_class(_prgs, num_prgs); - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - auto prg = pyopencl_call_guarded(clLinkProgram, ctx, devs, opts, - prgs, nullptr, nullptr); - *_prg = new_program(prg); - }); -#else - PYOPENCL_UNSUPPORTED(clLinkProgram, "CL 1.1 and below") -#endif -} - -error* -program__all_kernels(clobj_t _prg, clobj_t **_knl, uint32_t *size) -{ - auto prg = static_cast(_prg); - return c_handle_error([&] { - auto knls = prg->all_kernels(); - *size = knls.len(); - *_knl = knls.release(); - }); -} diff --git a/src/c_wrapper/program.h b/src/c_wrapper/program.h deleted file mode 100644 index 63d2fc760141bec68a8a1347e5300cb07ccda41b..0000000000000000000000000000000000000000 --- a/src/c_wrapper/program.h +++ /dev/null @@ -1,58 +0,0 @@ -#include "clhelper.h" - -#ifndef __PYOPENCL_PROGRAM_H -#define __PYOPENCL_PROGRAM_H - -class device; - -// {{{ program - -extern template class clobj; -extern template void print_arg(std::ostream&, - const cl_program&, bool); -extern template void print_buf(std::ostream&, const cl_program*, - size_t, ArgType, bool, bool); - -class program : public clobj { -private: - program_kind_type m_program_kind; - -public: - PYOPENCL_DEF_CL_CLASS(PROGRAM); - PYOPENCL_INLINE - program(cl_program prog, bool retain, - program_kind_type progkind=KND_UNKNOWN) - : clobj(prog), m_program_kind(progkind) - { - if (retain) { - pyopencl_call_guarded(clRetainProgram, PYOPENCL_CL_CASTABLE_THIS); - } - } - ~program(); - PYOPENCL_USE_RESULT PYOPENCL_INLINE program_kind_type - kind() const - { - return m_program_kind; - } - PYOPENCL_USE_RESULT pyopencl_buf - get_info__devices() const - { - return pyopencl_get_vec_info(cl_device_id, Program, PYOPENCL_CL_CASTABLE_THIS, - CL_PROGRAM_DEVICES); - } - generic_info get_info(cl_uint param_name) const; - PYOPENCL_USE_RESULT generic_info - get_build_info(const device *dev, cl_program_build_info param_name) const; -#if PYOPENCL_CL_VERSION >= 0x1020 - void compile(const char *opts, const clobj_t *_devs, size_t num_devs, - const clobj_t *_prgs, const char *const *names, - size_t num_hdrs); -#endif - pyopencl_buf all_kernels(); -}; - -extern template void print_clobj(std::ostream&, const program*); - -// }}} - -#endif diff --git a/src/c_wrapper/pyhelper.cpp b/src/c_wrapper/pyhelper.cpp deleted file mode 100644 index 7397d12b7ddc1238801ded0a269a768da8f37e5a..0000000000000000000000000000000000000000 --- a/src/c_wrapper/pyhelper.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "pyhelper.h" - -namespace py { -WrapFunc gc; -WrapFunc ref; -WrapFunc deref; -WrapFunc call; -} - -void -set_py_funcs(int (*_gc)(), void *(*_ref)(void*), void (*_deref)(void*), - void (*_call)(void*, cl_int)) -{ - py::gc = _gc; - py::ref = _ref; - py::deref = _deref; - py::call = _call; -} diff --git a/src/c_wrapper/pyhelper.h b/src/c_wrapper/pyhelper.h deleted file mode 100644 index 50c08402908aa92107c9ce0f09bad1e4002f8d35..0000000000000000000000000000000000000000 --- a/src/c_wrapper/pyhelper.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef __PYOPENCL_PYHELPER_H -#define __PYOPENCL_PYHELPER_H - -#include "wrap_cl.h" -#include "function.h" - -template -class WrapFunc; - -template -class WrapFunc { - typedef Ret (*_FuncType)(Args...); - _FuncType m_func; - static PYOPENCL_INLINE _FuncType - check_func(_FuncType f) - { - return f ? f : ([] (Args...) {return Ret();}); - } -public: - WrapFunc(_FuncType func=nullptr) - : m_func(check_func(func)) - {} - Ret - operator()(Args... args) - { - return m_func(std::forward(args)...); - } - WrapFunc& - operator=(_FuncType func) - { - m_func = check_func(func); - return *this; - } -}; - -namespace py { -extern WrapFunc gc; -extern WrapFunc ref; -extern WrapFunc deref; -extern WrapFunc call; -} - -#endif diff --git a/src/c_wrapper/sampler.cpp b/src/c_wrapper/sampler.cpp deleted file mode 100644 index b373c7830f9b1398a7d904b3c153db6b7ad2ab6a..0000000000000000000000000000000000000000 --- a/src/c_wrapper/sampler.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include "sampler.h" -#include "context.h" -#include "clhelper.h" - -template class clobj; -template void print_arg(std::ostream&, const cl_sampler&, bool); -template void print_clobj(std::ostream&, const sampler*); -template void print_buf(std::ostream&, const cl_sampler*, - size_t, ArgType, bool, bool); - -sampler::~sampler() -{ - pyopencl_call_guarded_cleanup(clReleaseSampler, PYOPENCL_CL_CASTABLE_THIS); -} - -generic_info -sampler::get_info(cl_uint param_name) const -{ - switch ((cl_sampler_info)param_name) { - case CL_SAMPLER_REFERENCE_COUNT: - return pyopencl_get_int_info(cl_uint, Sampler, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_SAMPLER_CONTEXT: - return pyopencl_get_opaque_info(context, Sampler, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_SAMPLER_ADDRESSING_MODE: - return pyopencl_get_int_info(cl_addressing_mode, Sampler, - PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_SAMPLER_FILTER_MODE: - return pyopencl_get_int_info(cl_filter_mode, Sampler, PYOPENCL_CL_CASTABLE_THIS, param_name); - case CL_SAMPLER_NORMALIZED_COORDS: - return pyopencl_get_int_info(cl_bool, Sampler, PYOPENCL_CL_CASTABLE_THIS, param_name); - -#if PYOPENCL_CL_VERSION >= 0x2000 - // TODO: MIP_FILTER_MODE, LOD_MIN, LOD_MAX -#endif - - default: - throw clerror("Sampler.get_info", CL_INVALID_VALUE); - } -} - -// c wrapper - -// Sampler -error* -create_sampler(clobj_t *samp, clobj_t _ctx, int norm_coords, - cl_addressing_mode am, cl_filter_mode fm) -{ - auto ctx = static_cast(_ctx); - return c_handle_error([&] { - *samp = new sampler(pyopencl_call_guarded(clCreateSampler, ctx, - norm_coords, am, fm), - false); - }); -} diff --git a/src/c_wrapper/sampler.h b/src/c_wrapper/sampler.h deleted file mode 100644 index 404b82e57e552393dd2e2b005e54b491584ccd3e..0000000000000000000000000000000000000000 --- a/src/c_wrapper/sampler.h +++ /dev/null @@ -1,33 +0,0 @@ -#include "error.h" - -#ifndef __PYOPENCL_SAMPLER_H -#define __PYOPENCL_SAMPLER_H - -// {{{ sampler - -extern template class clobj; -extern template void print_arg(std::ostream&, - const cl_sampler&, bool); -extern template void print_buf(std::ostream&, const cl_sampler*, - size_t, ArgType, bool, bool); - -class sampler : public clobj { -public: - PYOPENCL_DEF_CL_CLASS(SAMPLER); - PYOPENCL_INLINE - sampler(cl_sampler samp, bool retain) - : clobj(samp) - { - if (retain) { - pyopencl_call_guarded(clRetainSampler, PYOPENCL_CL_CASTABLE_THIS); - } - } - ~sampler(); - generic_info get_info(cl_uint param_name) const; -}; - -extern template void print_clobj(std::ostream&, const sampler*); - -// }}} - -#endif diff --git a/src/c_wrapper/svm.cpp b/src/c_wrapper/svm.cpp deleted file mode 100644 index 8452ec99953e3806b0b890220f20c5c46d71a875..0000000000000000000000000000000000000000 --- a/src/c_wrapper/svm.cpp +++ /dev/null @@ -1,173 +0,0 @@ -#include "context.h" -#include "command_queue.h" -#include "event.h" - -error* -svm_alloc( - clobj_t _ctx, cl_mem_flags flags, size_t size, cl_uint alignment, - void **result) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - auto ctx = static_cast(_ctx); - return c_handle_retry_mem_error([&] { - *result = clSVMAlloc(ctx->data(), flags, size, alignment); - if (!*result) - throw clerror("clSVMalloc", CL_INVALID_VALUE, - "(allocation failure, unspecified reason)"); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clSVMAlloc, "CL 2.0") -#endif -} - - -error* -svm_free(clobj_t _ctx, void *svm_pointer) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - auto ctx = static_cast(_ctx); - // no error returns (?!) - clSVMFree(ctx->data(), svm_pointer); - return nullptr; -#else - PYOPENCL_UNSUPPORTED_BEFORE(clSVMFree, "CL 2.0") -#endif -} - - -error* -enqueue_svm_free( - clobj_t *evt, clobj_t _queue, - cl_uint num_svm_pointers, - void *svm_pointers[], - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueSVMFree, queue, - num_svm_pointers, svm_pointers, - /* pfn_free_func*/ nullptr, - /* user_data */ nullptr, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueSVMFree, "CL 2.0") -#endif -} - - -error* -enqueue_svm_memcpy( - clobj_t *evt, clobj_t _queue, - cl_bool is_blocking, - void *dst_ptr, const void *src_ptr, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for, void *pyobj) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueSVMMemcpy, queue, - is_blocking, - dst_ptr, src_ptr, size, - wait_for, nanny_event_out(evt, pyobj)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueSVMMemcpy, "CL 2.0") -#endif -} - - -error* -enqueue_svm_memfill( - clobj_t *evt, clobj_t _queue, - void *svm_ptr, - const void *pattern, size_t pattern_size, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueSVMMemFill, queue, - svm_ptr, - pattern, pattern_size, size, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueSVMMemFill, "CL 2.0") -#endif -} - - -error* -enqueue_svm_map( - clobj_t *evt, clobj_t _queue, - cl_bool blocking_map, cl_map_flags map_flags, - void *svm_ptr, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueSVMMap, queue, - blocking_map, map_flags, - svm_ptr, size, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueSVMMap, "CL 2.0") -#endif -} - - -error* -enqueue_svm_unmap( - clobj_t *evt, clobj_t _queue, - void *svm_ptr, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x2000 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueSVMUnmap, queue, - svm_ptr, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueSVMUnmap, "CL 2.0") -#endif -} - - -error* -enqueue_svm_migrate_mem( - clobj_t *evt, clobj_t _queue, - cl_uint num_svm_pointers, - const void **svm_pointers, - const size_t *sizes, - cl_mem_migration_flags flags, - const clobj_t *_wait_for, uint32_t num_wait_for) -{ -#if PYOPENCL_CL_VERSION >= 0x2010 - const auto wait_for = buf_from_class(_wait_for, num_wait_for); - auto queue = static_cast(_queue); - return c_handle_retry_mem_error([&] { - pyopencl_call_guarded( - clEnqueueSVMMigrateMem, queue, - num_svm_pointers, svm_pointers, sizes, flags, - wait_for, event_out(evt)); - }); -#else - PYOPENCL_UNSUPPORTED_BEFORE(clEnqueueSVMMigrateMem, "CL 2.1") -#endif -} diff --git a/src/c_wrapper/svm.h b/src/c_wrapper/svm.h deleted file mode 100644 index c0e39ec47390d543e0ed8e943edaf10522842d33..0000000000000000000000000000000000000000 --- a/src/c_wrapper/svm.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __PYOPENCL_SVM_H -#define __PYOPENCL_SVM_H - -#endif diff --git a/src/c_wrapper/utils.h b/src/c_wrapper/utils.h deleted file mode 100644 index d1bbb7d06f0779537bb61c953e2aae07e1e0e412..0000000000000000000000000000000000000000 --- a/src/c_wrapper/utils.h +++ /dev/null @@ -1,551 +0,0 @@ -#include "wrap_cl.h" -#include "function.h" -#include "debug.h" - -#include -#include -#include -#include - -#ifndef __PYOPENCL_UTILS_H -#define __PYOPENCL_UTILS_H - -#if (defined(__GNUC__) && (__GNUC__ > 2)) -# define PYOPENCL_EXPECT(exp, var) __builtin_expect(exp, var) -#else -# define PYOPENCL_EXPECT(exp, var) (exp) -#endif - -#define PYOPENCL_LIKELY(x) PYOPENCL_EXPECT(bool(x), true) -#define PYOPENCL_UNLIKELY(x) PYOPENCL_EXPECT(bool(x), false) - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE std::string -tostring(const T& v) -{ - std::ostringstream ostr; - ostr << v; - return ostr.str(); -} - -template -struct CLGenericArgPrinter { - static PYOPENCL_INLINE void - print(std::ostream &stm, T &arg) - { - stm << arg; - } -}; - -PYOPENCL_USE_RESULT static PYOPENCL_INLINE void* -cl_memdup(const void *p, size_t size) -{ - void *res = malloc(size); - memcpy(res, p, size); - return res; -} - -template -PYOPENCL_USE_RESULT static PYOPENCL_INLINE T* -cl_memdup(const T *p) -{ - // Not supported by libstdc++ yet... - // static_assert(std::is_trivially_copy_constructible::value); - return static_cast(cl_memdup(static_cast(p), sizeof(T))); -} - -enum class ArgType { - None, - SizeOf, - Length, -}; - -template -struct type_size : std::integral_constant {}; -template -struct type_size, void>::value> > : - std::integral_constant {}; - -template -static PYOPENCL_INLINE void -_print_buf_content(std::ostream &stm, const T *p, size_t len) -{ - if (len > 1) { - stm << "["; - } - for (size_t i = 0;i < len;i++) { - CLGenericArgPrinter::print(stm, p[i]); - if (i != len - 1) { - stm << ", "; - } - } - if (len > 1) { - stm << "]"; - } -} - -template<> -PYOPENCL_INLINE void -_print_buf_content(std::ostream &stm, const char *p, size_t len) -{ - dbg_print_str(stm, p, len); -} - -template<> -PYOPENCL_INLINE void -_print_buf_content(std::ostream &stm, - const unsigned char *p, size_t len) -{ - dbg_print_bytes(stm, p, len); -} - -template<> -PYOPENCL_INLINE void -_print_buf_content(std::ostream &stm, const void *p, size_t len) -{ - dbg_print_bytes(stm, static_cast(p), len); -} - -template -void -print_buf(std::ostream &stm, const T *p, size_t len, - ArgType arg_type, bool content, bool out) -{ - const size_t ele_size = type_size::value; - if (out) { - stm << "*(" << (const void*)p << "): "; - if (p) { - _print_buf_content(stm, p, len); - } else { - stm << "NULL"; - } - } else { - bool need_quote = content || arg_type != ArgType::None; - if (content) { - if (p) { - _print_buf_content(stm, p, len); - stm << " "; - } else { - stm << "NULL "; - } - } - if (need_quote) { - stm << "<"; - } - switch (arg_type) { - case ArgType::SizeOf: - stm << ele_size * len << ", "; - break; - case ArgType::Length: - stm << len << ", "; - break; - default: - break; - } - stm << (const void*)p; - if (need_quote) { - stm << ">"; - } - } -} - -template -void -print_arg(std::ostream &stm, const T &v, bool out) -{ - if (!out) { - stm << (const void*)&v; - } else { - stm << "*(" << (const void*)&v << "): " << v; - } -} -extern template void print_buf(std::ostream&, const char*, size_t, - ArgType, bool, bool); -extern template void print_buf(std::ostream&, const cl_int*, size_t, - ArgType, bool, bool); -extern template void print_buf(std::ostream&, const cl_uint*, size_t, - ArgType, bool, bool); -extern template void print_buf(std::ostream&, const cl_long*, size_t, - ArgType, bool, bool); -extern template void print_buf(std::ostream&, const cl_ulong*, size_t, - ArgType, bool, bool); -extern template void print_buf(std::ostream&, - const cl_image_format*, size_t, - ArgType, bool, bool); - -template<> -struct CLGenericArgPrinter { - static PYOPENCL_INLINE void - print(std::ostream &stm, std::nullptr_t&) - { - stm << (void*)nullptr; - } -}; - -template -struct CLGenericArgPrinter< - T, enable_if_t >::value || - std::is_same >::value> > { - static PYOPENCL_INLINE void - print(std::ostream &stm, const char *str) - { - dbg_print_str(stm, str); - } -}; - -template -class CLArg { -private: - T &m_arg; -public: - CLArg(T &arg) noexcept - : m_arg(arg) - {} - CLArg(CLArg &&other) noexcept - : m_arg(other.m_arg) - {} - PYOPENCL_INLINE T& - convert() noexcept - { - return m_arg; - } - PYOPENCL_INLINE void - print(std::ostream &stm) - { - CLGenericArgPrinter::print(stm, m_arg); - } -}; - -template<> -class CLArg : public CLArg { - cl_bool m_arg; -public: - CLArg(bool arg) noexcept - : CLArg(m_arg), m_arg(arg ? CL_TRUE : CL_FALSE) - {} - CLArg(CLArg &&other) noexcept - : CLArg(bool(other.m_arg)) - {} - PYOPENCL_INLINE void - print(std::ostream &stm) - { - stm << (m_arg ? "true" : "false"); - } -}; - -template -class ArgBuffer { -private: - T *m_buf; - size_t m_len; -protected: - PYOPENCL_INLINE void - set(T *buf) noexcept - { - m_buf = buf; - } -public: - typedef T type; - constexpr static ArgType arg_type = AT; - ArgBuffer(T *buf, size_t l) noexcept - : m_buf(buf), m_len(l) - {} - ArgBuffer(ArgBuffer &&other) noexcept - : ArgBuffer(other.m_buf, other.m_len) - {} - PYOPENCL_INLINE rm_const_t* - get() const noexcept - { - return const_cast*>(m_buf); - } - template - PYOPENCL_INLINE T2& - operator[](int i) const - { - return m_buf[i]; - } - PYOPENCL_INLINE size_t - len() const noexcept - { - return m_len; - } -}; - -template -struct _ToArgBuffer { - static PYOPENCL_INLINE ArgBuffer, AT> - convert(T &buf) - { - return ArgBuffer, AT>(&buf, 1); - } -}; - -template -static PYOPENCL_INLINE auto -buf_arg(T &&buf) -> decltype(_ToArgBuffer::convert(std::forward(buf))) -{ - return _ToArgBuffer::convert(std::forward(buf)); -} - -template -static PYOPENCL_INLINE ArgBuffer -buf_arg(T *buf, size_t l) -{ - return ArgBuffer(buf, l); -} - -template -static PYOPENCL_INLINE auto -size_arg(T&&... buf) - -> decltype(buf_arg(std::forward(buf)...)) -{ - return buf_arg(std::forward(buf)...); -} - -template -static PYOPENCL_INLINE auto -len_arg(T&&... buf) - -> decltype(buf_arg(std::forward(buf)...)) -{ - return buf_arg(std::forward(buf)...); -} - -template -struct _ArgBufferConverter; - -template -struct _ArgBufferConverter > { - static PYOPENCL_INLINE auto - convert(Buff &buff) -> decltype(buff.get()) - { - return buff.get(); - } -}; - -template -struct _ArgBufferConverter > { - static PYOPENCL_INLINE auto - convert(Buff &buff) - -> decltype(std::make_tuple(type_size::value * - buff.len(), buff.get())) - { - return std::make_tuple(type_size::value * - buff.len(), buff.get()); - } -}; - -template -struct _ArgBufferConverter > { - static PYOPENCL_INLINE auto - convert(Buff &buff) -> decltype(std::make_tuple(buff.len(), buff.get())) - { - return std::make_tuple(buff.len(), buff.get()); - } -}; - -template -class CLArg, - Buff>::value> > { -private: - Buff &m_buff; -public: - constexpr static bool is_out = !std::is_const::value; - CLArg(Buff &buff) noexcept - : m_buff(buff) - {} - CLArg(CLArg &&other) noexcept - : m_buff(other.m_buff) - {} - PYOPENCL_INLINE auto - convert() const noexcept - -> decltype(_ArgBufferConverter::convert(m_buff)) - { - return _ArgBufferConverter::convert(m_buff); - } - PYOPENCL_INLINE void - print(std::ostream &stm, bool out=false) - { - print_buf(stm, m_buff.get(), m_buff.len(), - Buff::arg_type, out || !is_out, out); - } -}; - -template -class ConstBuffer : public ArgBuffer { -private: - T m_intern_buf[n]; - ConstBuffer(ConstBuffer&&) = delete; - ConstBuffer() = delete; -public: - ConstBuffer(const T *buf, size_t l, T content=0) - : ArgBuffer(buf, n) - { - if (l < n) { - memcpy(m_intern_buf, buf, type_size::value * l); - for (size_t i = l;i < n;i++) { - m_intern_buf[i] = content; - } - this->set(m_intern_buf); - } - } -}; - -struct OutArg { -}; - -template -class CLArg::value> > { -private: - bool m_converted; - bool m_need_cleanup; - T &m_arg; -public: - constexpr static bool is_out = true; - CLArg(T &arg) - : m_converted(false), m_need_cleanup(false), m_arg(arg) - { - } - CLArg(CLArg &&other) noexcept - : m_converted(other.m_converted), m_need_cleanup(other.m_need_cleanup), - m_arg(other.m_arg) - { - other.m_need_cleanup = false; - } - PYOPENCL_INLINE auto - convert() -> decltype(m_arg.get()) - { - return m_arg.get(); - } - PYOPENCL_INLINE void - finish(bool converted) noexcept - { - m_need_cleanup = !converted; - } - PYOPENCL_INLINE void - post() - { - m_arg.convert(); - m_converted = true; - } - ~CLArg() - { - if (m_need_cleanup) { - m_arg.cleanup(m_converted); - } - } - PYOPENCL_INLINE void - print(std::ostream &stm, bool out=false) - { - m_arg.print(stm, out); - } -}; - -template -struct _D { - void operator()(T *p) { - free((void*)p); - } -}; - -template -class pyopencl_buf : public std::unique_ptr > { - size_t m_len; -public: - PYOPENCL_INLINE - pyopencl_buf(size_t len=1) - : std::unique_ptr >((T*)(len ? malloc(sizeof(T) * (len + 1)) : - nullptr)), m_len(len) - { - if (len) { - memset((void*)this->get(), 0, (len + 1) * sizeof(T)); - } - } - PYOPENCL_INLINE size_t - len() const - { - return m_len; - } - PYOPENCL_INLINE T& - operator[](int i) - { - return this->get()[i]; - } - PYOPENCL_INLINE const T& - operator[](int i) const - { - return this->get()[i]; - } - PYOPENCL_INLINE void - resize(size_t len) - { - if (len == m_len) - return; - m_len = len; - this->reset((T*)realloc((void*)this->release(), - (len + 1) * sizeof(T))); - } -}; - -template -using pyopencl_buf_ele_t = typename rm_ref_t::element_type; - -template -struct is_pyopencl_buf : std::false_type {}; - -template -struct is_pyopencl_buf< - T, enable_if_t >, - rm_ref_t >::value> > : std::true_type {}; - -template -struct _ToArgBuffer::value && - std::is_const >::value> > { - static PYOPENCL_INLINE ArgBuffer, AT> - convert(T &&buf) - { - return ArgBuffer, AT>(buf.get(), buf.len()); - } -}; - -template -struct _ToArgBuffer::value && - !std::is_const >::value> > { - static PYOPENCL_INLINE ArgBuffer, AT> - convert(T &&buf) - { - return ArgBuffer, AT>(buf.get(), buf.len()); - } -}; - -template -using __pyopencl_buf_arg_type = - rm_ref_t()))>; - -template -class CLArg::value> > - : public CLArg<__pyopencl_buf_arg_type > { - typedef __pyopencl_buf_arg_type BufType; - BufType m_buff; -public: - PYOPENCL_INLINE - CLArg(Buff &buff) noexcept - : CLArg(m_buff), m_buff(len_arg(buff)) - {} - PYOPENCL_INLINE - CLArg(CLArg &&other) noexcept - : CLArg(m_buff), m_buff(std::move(other.m_buff)) - {} -}; - -// FIXME -PYOPENCL_USE_RESULT static PYOPENCL_INLINE char* -_copy_str(const std::string& str) -{ - return strdup(str.c_str()); -} - -#endif diff --git a/src/c_wrapper/wrap_cl.cpp b/src/c_wrapper/wrap_cl.cpp deleted file mode 100644 index 1e001eb4eeb938f0b8d3656672084889095b6990..0000000000000000000000000000000000000000 --- a/src/c_wrapper/wrap_cl.cpp +++ /dev/null @@ -1,123 +0,0 @@ -#include "pyhelper.h" -#include "clhelper.h" -#include "platform.h" -#include "device.h" -#include "context.h" -#include "command_queue.h" -#include "event.h" -#include "memory_object.h" -#include "image.h" -#include "gl_obj.h" -#include "memory_map.h" -#include "buffer.h" -#include "sampler.h" -#include "program.h" -#include "kernel.h" - -template void print_buf(std::ostream&, const char*, size_t, - ArgType, bool, bool); -template void print_buf(std::ostream&, const cl_int*, size_t, - ArgType, bool, bool); -template void print_buf(std::ostream&, const cl_uint*, size_t, - ArgType, bool, bool); -template void print_buf(std::ostream&, const cl_long*, size_t, - ArgType, bool, bool); -template void print_buf(std::ostream&, const cl_ulong*, size_t, - ArgType, bool, bool); -template void print_buf(std::ostream&, - const cl_image_format*, size_t, - ArgType, bool, bool); - -// {{{ c wrapper - -// Generic functions -int -get_cl_version() -{ - return PYOPENCL_CL_VERSION; -} - -void -free_pointer(void *p) -{ - free(p); -} - -void -free_pointer_array(void **p, uint32_t size) -{ - for (uint32_t i = 0;i < size;i++) { - free(p[i]); - } -} - - -intptr_t -clobj__int_ptr(clobj_t obj) -{ - return PYOPENCL_LIKELY(obj) ? obj->intptr() : 0l; -} - -static PYOPENCL_INLINE clobj_t -_from_int_ptr(intptr_t ptr, class_t class_, bool retain) -{ - switch(class_) { - case CLASS_PLATFORM: - return clobj_from_int_ptr(ptr, retain); - case CLASS_DEVICE: - return clobj_from_int_ptr(ptr, retain); - case CLASS_KERNEL: - return clobj_from_int_ptr(ptr, retain); - case CLASS_CONTEXT: - return clobj_from_int_ptr(ptr, retain); - case CLASS_COMMAND_QUEUE: - return clobj_from_int_ptr(ptr, retain); - case CLASS_BUFFER: - return clobj_from_int_ptr(ptr, retain); - case CLASS_PROGRAM: - return clobj_from_int_ptr(ptr, retain); - case CLASS_EVENT: - return clobj_from_int_ptr(ptr, retain); - case CLASS_IMAGE: - return clobj_from_int_ptr(ptr, retain); - case CLASS_SAMPLER: - return clobj_from_int_ptr(ptr, retain); -#ifdef HAVE_GL - case CLASS_GL_BUFFER: - return clobj_from_int_ptr(ptr, retain); - case CLASS_GL_RENDERBUFFER: - return clobj_from_int_ptr(ptr, retain); -#endif - default: - throw clerror("unknown class", CL_INVALID_VALUE); - } -} - -error* -clobj__from_int_ptr(clobj_t *out, intptr_t ptr, class_t class_, int retain) -{ - return c_handle_error([&] { - *out = _from_int_ptr(ptr, class_, retain); - }); -} - -error* -clobj__get_info(clobj_t obj, cl_uint param, generic_info *out) -{ - return c_handle_error([&] { - if (PYOPENCL_UNLIKELY(!obj)) { - throw clerror("NULL input", CL_INVALID_VALUE); - } - *out = obj->get_info(param); - }); -} - -void -clobj__delete(clobj_t obj) -{ - delete obj; -} - -// }}} - -// vim: foldmethod=marker diff --git a/src/c_wrapper/wrap_cl.h b/src/c_wrapper/wrap_cl.h deleted file mode 100644 index 21ff9c086805056e701186adf00070ed1eee48ed..0000000000000000000000000000000000000000 --- a/src/c_wrapper/wrap_cl.h +++ /dev/null @@ -1,171 +0,0 @@ -#ifndef _WRAP_CL_H -#define _WRAP_CL_H - - -// CL 1.2 undecided: -// clSetPrintfCallback - -// {{{ includes - -#include - -#include "pyopencl_ext.h" - -#define CL_USE_DEPRECATED_OPENCL_1_1_APIS - -#if (defined(__APPLE__) && !defined(PYOPENCL_APPLE_USE_CL_H)) - -// {{{ Mac - -#define PYOPENCL_HAVE_EVENT_SET_CALLBACK - -#ifdef HAVE_GL - -#define PYOPENCL_GL_SHARING_VERSION 1 - -#include -#include -#include -#endif -// }}} - -#else - -// {{{ elsewhere - -#if defined(_WIN32) - -// {{{ Windows - -#define NOMINMAX -#include -#define strdup _strdup -#define strcasecmp _stricmp - -#if _MSC_VER >= 1900 /* VS 2015 and higher */ -#define PYOPENCL_HAVE_EVENT_SET_CALLBACK -#endif - -// }}} - -#else - -// {{{ non-Windows - -#include -#define PYOPENCL_HAVE_EVENT_SET_CALLBACK - -// }}} - -#endif - -#ifdef HAVE_GL -#include -#include -#endif - -#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) -#define PYOPENCL_GL_SHARING_VERSION cl_khr_gl_sharing -#endif - -// }}} - -#endif - -// }}} - - -// {{{ version handling - -#ifdef PYOPENCL_PRETEND_CL_VERSION -#define PYOPENCL_CL_VERSION PYOPENCL_PRETEND_CL_VERSION -#else - -#if defined(CL_VERSION_2_2) -#define PYOPENCL_CL_VERSION 0x2020 -#elif defined(CL_VERSION_2_1) -#define PYOPENCL_CL_VERSION 0x2010 -#elif defined(CL_VERSION_2_0) -#define PYOPENCL_CL_VERSION 0x2000 -#elif defined(CL_VERSION_1_2) -#define PYOPENCL_CL_VERSION 0x1020 -#elif defined(CL_VERSION_1_1) -#define PYOPENCL_CL_VERSION 0x1010 -#else -#define PYOPENCL_CL_VERSION 0x1000 -#endif - -#endif - -// }}} - -#ifndef CL_VERSION_2_0 -typedef void* CLeglImageKHR; -typedef void* CLeglDisplayKHR; -typedef void* CLeglSyncKHR; -typedef intptr_t cl_egl_image_properties_khr; -typedef cl_bitfield cl_device_svm_capabilities; -typedef cl_bitfield cl_svm_mem_flags; -typedef intptr_t cl_pipe_properties; -typedef cl_uint cl_pipe_info; -typedef cl_bitfield cl_sampler_properties; -typedef cl_uint cl_kernel_exec_info; -#endif - -#ifndef CL_VERSION_1_2 -typedef intptr_t cl_device_partition_property; -typedef cl_uint cl_kernel_arg_info; - -typedef struct _cl_image_desc { - cl_mem_object_type image_type; - size_t image_width; - size_t image_height; - size_t image_depth; - size_t image_array_size; - size_t image_row_pitch; - size_t image_slice_pitch; - cl_uint num_mip_levels; - cl_uint num_samples; - cl_mem buffer; -} cl_image_desc; - -typedef cl_bitfield cl_mem_migration_flags; -#endif - -#ifndef CL_VERSION_1_1 -typedef struct _cl_buffer_region { - size_t origin; - size_t size; -} cl_buffer_region; -#endif - -#ifndef cl_ext_migrate_memobject -typedef cl_bitfield cl_mem_migration_flags_ext; -#endif - -struct clbase; -typedef clbase *clobj_t; - -#ifdef __cplusplus -extern "C" { -#endif - -#include "wrap_cl_core.h" - -#ifdef HAVE_GL -#include "wrap_cl_gl_core.h" -#endif - -#ifdef __cplusplus -} -#endif - -#if defined __GNUC__ || defined __GNUG__ -#define PYOPENCL_USE_RESULT __attribute__((warn_unused_result)) -#else -#define PYOPENCL_USE_RESULT -#endif - -#endif - -// vim: foldmethod=marker diff --git a/src/c_wrapper/wrap_cl_core.h b/src/c_wrapper/wrap_cl_core.h deleted file mode 100644 index 184cd001f5157661aa0a70732e06fc327694c32b..0000000000000000000000000000000000000000 --- a/src/c_wrapper/wrap_cl_core.h +++ /dev/null @@ -1,399 +0,0 @@ -// Interface between C and Python - -struct clbase; -typedef struct clbase *clobj_t; - -// {{{ types - -typedef enum { - TYPE_FLOAT, - TYPE_INT, - TYPE_UINT, -} type_t; - -typedef enum { - KND_UNKNOWN, - KND_SOURCE, - KND_BINARY -} program_kind_type; - -typedef struct { - const char *routine; - const char *msg; - cl_int code; - int other; -} error; - -typedef enum { - CLASS_NONE, - CLASS_PLATFORM, - CLASS_DEVICE, - CLASS_KERNEL, - CLASS_CONTEXT, - CLASS_BUFFER, - CLASS_PROGRAM, - CLASS_EVENT, - CLASS_COMMAND_QUEUE, - CLASS_GL_BUFFER, - CLASS_GL_RENDERBUFFER, - CLASS_IMAGE, - CLASS_SAMPLER -} class_t; - -typedef struct { - class_t opaque_class; - const char *type; - bool free_type; - void *value; - bool free_value; -} generic_info; - -// }}} - -// {{{ generic functions - -int get_cl_version(); -void free_pointer(void*); -void free_pointer_array(void**, uint32_t size); -void set_py_funcs(int (*_gc)(), void *(*_ref)(void*), void (*_deref)(void*), - void (*_call)(void*, cl_int)); -int have_gl(); - -unsigned bitlog2(unsigned long v); -void populate_constants(void(*add)(const char*, const char*, int64_t value)); -int get_debug(); -void set_debug(int debug); - -// }}} - -// {{{ platform - -error *get_platforms(clobj_t **ptr_platforms, uint32_t *num_platforms); -error *platform__get_devices(clobj_t platform, clobj_t **ptr_devices, - uint32_t *num_devices, cl_device_type devtype); -error *platform__unload_compiler(clobj_t plat); - -// }}} - -// {{{ device -error *device__create_sub_devices(clobj_t _dev, clobj_t **_devs, - uint32_t *num_devices, - const cl_device_partition_property *props); - -// }}} - -// {{{ context - -error *create_context(clobj_t *ctx, const cl_context_properties *props, - cl_uint num_devices, const clobj_t *ptr_devices); -error *create_context_from_type(clobj_t *_ctx, - const cl_context_properties *props, - cl_device_type dev_type); -error *context__get_supported_image_formats(clobj_t context, cl_mem_flags flags, - cl_mem_object_type image_type, - generic_info *out); - -// }}} - -// {{{ command Queue - -error *create_command_queue(clobj_t *queue, clobj_t context, clobj_t device, - cl_command_queue_properties properties); -error *command_queue__finish(clobj_t queue); -error *command_queue__flush(clobj_t queue); - -// }}} - -// {{{ buffer -error *create_buffer(clobj_t *buffer, clobj_t context, cl_mem_flags flags, - size_t size, void *hostbuf); -error *buffer__get_sub_region(clobj_t *_sub_buf, clobj_t _buf, size_t orig, - size_t size, cl_mem_flags flags); - -// }}} - -// {{{ memory object - -error *memory_object__release(clobj_t obj); -error *memory_object__get_host_array(clobj_t, void **hostptr, size_t *size); - -// }}} - -// {{{ memory map - -error *memory_map__release(clobj_t _map, clobj_t _queue, - const clobj_t *_wait_for, uint32_t num_wait_for, - clobj_t *evt); -void *memory_map__data(clobj_t _map); - -// }}} - -// {{{ svm - -error* svm_alloc( - clobj_t _ctx, cl_mem_flags flags, size_t size, cl_uint alignment, - void **result); -error* svm_free(clobj_t _ctx, void *svm_pointer); -error* enqueue_svm_free( - clobj_t *evt, clobj_t _queue, - cl_uint num_svm_pointers, - void *svm_pointers[], - const clobj_t *_wait_for, uint32_t num_wait_for); -error* enqueue_svm_memcpy( - clobj_t *evt, clobj_t _queue, - cl_bool is_blocking, - void *dst_ptr, const void *src_ptr, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for, - void *pyobj); -error* enqueue_svm_memfill( - clobj_t *evt, clobj_t _queue, - void *svm_ptr, - const void *pattern, size_t pattern_size, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for); -error* enqueue_svm_map( - clobj_t *evt, clobj_t _queue, - cl_bool blocking_map, cl_map_flags map_flags, - void *svm_ptr, size_t size, - const clobj_t *_wait_for, uint32_t num_wait_for); -error* enqueue_svm_unmap( - clobj_t *evt, clobj_t _queue, - void *svm_ptr, - const clobj_t *_wait_for, uint32_t num_wait_for); -error* enqueue_svm_migrate_mem( - clobj_t *evt, clobj_t _queue, - cl_uint num_svm_pointers, - const void **svm_pointers, - const size_t *sizes, - cl_mem_migration_flags flags, - const clobj_t *_wait_for, uint32_t num_wait_for); - -// }}} - -// {{{ program - -error *create_program_with_source(clobj_t *program, clobj_t context, - const char *src); -error* create_program_with_il(clobj_t *prog, clobj_t _ctx, void *il, size_t length); -error *create_program_with_binary(clobj_t *program, clobj_t context, - cl_uint num_devices, const clobj_t *devices, - const unsigned char **binaries, - size_t *binary_sizes); -error *program__build(clobj_t program, const char *options, - cl_uint num_devices, const clobj_t *devices); -error *program__kind(clobj_t program, int *kind); -error *program__get_build_info(clobj_t program, clobj_t device, - cl_program_build_info param, generic_info *out); -error *program__create_with_builtin_kernels(clobj_t *_prg, clobj_t _ctx, - const clobj_t *_devs, - uint32_t num_devs, - const char *names); -error *program__compile(clobj_t _prg, const char *opts, const clobj_t *_devs, - size_t num_devs, const clobj_t *_prgs, - const char *const *names, size_t num_hdrs); -error *program__link(clobj_t *_prg, clobj_t _ctx, const clobj_t *_prgs, - size_t num_prgs, const char *opts, - const clobj_t *_devs, size_t num_devs); -error *program__all_kernels(clobj_t _prg, clobj_t **_knl, uint32_t *size); - -// }}} - -// {{{ sampler - -error *create_sampler(clobj_t *sampler, clobj_t context, int norm_coords, - cl_addressing_mode am, cl_filter_mode fm); - -// }}} - -// {{{ kernel - -error *create_kernel(clobj_t *kernel, clobj_t program, const char *name); -error *kernel__set_arg_null(clobj_t kernel, cl_uint arg_index); -error *kernel__set_arg_mem(clobj_t kernel, cl_uint arg_index, clobj_t mem); -error *kernel__set_arg_sampler(clobj_t kernel, cl_uint arg_index, - clobj_t sampler); -error *kernel__set_arg_buf(clobj_t kernel, cl_uint arg_index, - const void *buffer, size_t size); -error *kernel__set_arg_svm_pointer(clobj_t kernel, cl_uint arg_index, void *value); -error *kernel__get_work_group_info(clobj_t kernel, - cl_kernel_work_group_info param, - clobj_t device, generic_info *out); -error *kernel__get_arg_info(clobj_t _knl, cl_uint idx, - cl_kernel_arg_info param, generic_info *out); - -// }}} - -// {{{ image -error *create_image_2d(clobj_t *image, clobj_t context, cl_mem_flags flags, - cl_image_format *fmt, size_t width, size_t height, - size_t pitch, void *buffer); -error *create_image_3d(clobj_t *image, clobj_t context, cl_mem_flags flags, - cl_image_format *fmt, size_t width, size_t height, - size_t depth, size_t pitch_x, size_t pitch_y, - void *buffer); -error *create_image_from_desc(clobj_t *img, clobj_t _ctx, cl_mem_flags flags, - cl_image_format *fmt, cl_image_desc *desc, - void *buffer); -error *image__get_image_info(clobj_t img, cl_image_info param, - generic_info *out); -type_t image__get_fill_type(clobj_t img); -// }}} - -// {{{ event - -error *event__get_profiling_info(clobj_t event, cl_profiling_info param, - generic_info *out); -error *event__wait(clobj_t event); -error *event__set_callback(clobj_t _evt, cl_int type, void *pyobj); -error *wait_for_events(const clobj_t *_wait_for, uint32_t num_wait_for); - -// }}} - -// {{{ nanny event - -void *nanny_event__get_ward(clobj_t evt); - -// }}} - -// {{{ user event - -error *create_user_event(clobj_t *_evt, clobj_t _ctx); -error *user_event__set_status(clobj_t _evt, cl_int status); - -// }}} - -// {{{ enqueue_* -error *enqueue_nd_range_kernel(clobj_t *event, clobj_t queue, - clobj_t kernel, cl_uint work_dim, - const size_t *global_work_offset, - const size_t *global_work_size, - const size_t *local_work_size, - const clobj_t *wait_for, uint32_t num_wait_for); -error *enqueue_task(clobj_t *_evt, clobj_t _queue, clobj_t _knl, - const clobj_t *_wait_for, uint32_t num_wait_for); - -error *enqueue_marker_with_wait_list(clobj_t *event, clobj_t queue, - const clobj_t *wait_for, - uint32_t num_wait_for); -error *enqueue_barrier_with_wait_list(clobj_t *event, clobj_t queue, - const clobj_t *wait_for, - uint32_t num_wait_for); -error *enqueue_wait_for_events(clobj_t _queue, const clobj_t *_wait_for, - uint32_t num_wait_for); -error *enqueue_marker(clobj_t *event, clobj_t queue); -error *enqueue_barrier(clobj_t queue); -error *enqueue_migrate_mem_objects(clobj_t *evt, clobj_t _queue, - const clobj_t *_mem_obj, uint32_t, - cl_mem_migration_flags flags, - const clobj_t *_wait_for, uint32_t num_wait_for); - -// }}} - -// {{{ enqueue_*_buffer* - -error *enqueue_read_buffer(clobj_t *event, clobj_t queue, clobj_t mem, - void *buffer, size_t size, size_t device_offset, - const clobj_t *wait_for, uint32_t num_wait_for, - int is_blocking, void *pyobj); -error *enqueue_copy_buffer(clobj_t *event, clobj_t queue, clobj_t src, - clobj_t dst, ptrdiff_t byte_count, - size_t src_offset, size_t dst_offset, - const clobj_t *wait_for, uint32_t num_wait_for); -error *enqueue_write_buffer(clobj_t *event, clobj_t queue, clobj_t mem, - const void *buffer, size_t size, - size_t device_offset, const clobj_t *wait_for, - uint32_t num_wait_for, int is_blocking, - void *pyobj); -error *enqueue_map_buffer(clobj_t *_evt, clobj_t *mpa, clobj_t _queue, - clobj_t _mem, cl_map_flags flags, size_t offset, - size_t size, const clobj_t *_wait_for, - uint32_t num_wait_for, int block); -error *enqueue_fill_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _mem, - void *pattern, size_t psize, size_t offset, - size_t size, const clobj_t *_wait_for, - uint32_t num_wait_for); -error *enqueue_read_buffer_rect(clobj_t *evt, clobj_t _queue, clobj_t _mem, - void *buf, const size_t *_buf_orig, - size_t buf_orig_l, const size_t *_host_orig, - size_t host_orig_l, const size_t *_reg, - size_t reg_l, const size_t *_buf_pitches, - size_t buf_pitches_l, - const size_t *_host_pitches, - size_t host_pitches_l, const clobj_t *_wait_for, - uint32_t num_wait_for, int block, void *pyobj); -error *enqueue_write_buffer_rect(clobj_t *evt, clobj_t _queue, clobj_t _mem, - void *buf, const size_t *_buf_orig, - size_t buf_orig_l, const size_t *_host_orig, - size_t host_orig_l, const size_t *_reg, - size_t reg_l, const size_t *_buf_pitches, - size_t buf_pitches_l, - const size_t *_host_pitches, - size_t host_pitches_l, - const clobj_t *_wait_for, - uint32_t num_wait_for, int block, void *pyobj); -error *enqueue_copy_buffer_rect(clobj_t *evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, const size_t *_src_orig, - size_t src_orig_l, const size_t *_dst_orig, - size_t dst_orig_l, const size_t *_reg, - size_t reg_l, const size_t *_src_pitches, - size_t src_pitches_l, - const size_t *_dst_pitches, - size_t dst_pitches_l, const clobj_t *_wait_for, - uint32_t num_wait_for); - -// }}} - -// {{{ enqueue_*_image* - -error *enqueue_read_image(clobj_t *event, clobj_t queue, clobj_t mem, - const size_t *origin, size_t origin_l, - const size_t *region, size_t region_l, - void *buffer, size_t row_pitch, size_t slice_pitch, - const clobj_t *wait_for, uint32_t num_wait_for, - int is_blocking, void *pyobj); -error *enqueue_copy_image(clobj_t *_evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, const size_t *_src_origin, - size_t src_origin_l, const size_t *_dst_origin, - size_t dst_origin_l, const size_t *_region, - size_t region_l, const clobj_t *_wait_for, - uint32_t num_wait_for); -error *enqueue_write_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, - const size_t *origin, size_t origin_l, - const size_t *region, size_t region_l, - const void *buffer, size_t row_pitch, - size_t slice_pitch, const clobj_t *_wait_for, - uint32_t num_wait_for, int is_blocking, - void *pyobj); -error *enqueue_map_image(clobj_t *_evt, clobj_t *map, clobj_t _queue, - clobj_t _mem, cl_map_flags flags, - const size_t *_origin, size_t origin_l, - const size_t *_region, size_t region_l, - size_t *row_pitch, size_t *slice_pitch, - const clobj_t *_wait_for, uint32_t num_wait_for, - int block); -error *enqueue_fill_image(clobj_t *evt, clobj_t _queue, clobj_t mem, - const void *color, const size_t *_origin, - size_t origin_l, const size_t *_region, - size_t region_l, const clobj_t *_wait_for, - uint32_t num_wait_for); -error *enqueue_copy_image_to_buffer(clobj_t *evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, const size_t *_orig, size_t, - const size_t *_reg, size_t, size_t offset, - const clobj_t *_wait_for, uint32_t); -error *enqueue_copy_buffer_to_image(clobj_t *evt, clobj_t _queue, clobj_t _src, - clobj_t _dst, size_t offset, - const size_t *_orig, size_t, - const size_t *_reg, size_t, - const clobj_t *_wait_for, uint32_t); - -// }}} - -// {{{ cl object - -intptr_t clobj__int_ptr(clobj_t obj); -error *clobj__get_info(clobj_t obj, cl_uint param, generic_info *out); -void clobj__delete(clobj_t obj); -error *clobj__from_int_ptr(clobj_t *out, intptr_t ptr, class_t, int); - -// }}} - -// vim: foldmethod=marker diff --git a/src/c_wrapper/wrap_cl_gl_core.h b/src/c_wrapper/wrap_cl_gl_core.h deleted file mode 100644 index 606d7c1d61c0f4006016f9724b76150c02857d79..0000000000000000000000000000000000000000 --- a/src/c_wrapper/wrap_cl_gl_core.h +++ /dev/null @@ -1,18 +0,0 @@ -// Interface between C and Python for GL related functions - -error* create_from_gl_texture(clobj_t *ptr, clobj_t _ctx, cl_mem_flags flags, - GLenum texture_target, GLint miplevel, - GLuint texture); -error *create_from_gl_buffer(clobj_t *ptr, clobj_t context, - cl_mem_flags flags, GLuint bufobj); -error *create_from_gl_renderbuffer(clobj_t *ptr, clobj_t context, - cl_mem_flags flags, GLuint bufobj); -error *enqueue_acquire_gl_objects( - clobj_t *event, clobj_t queue, const clobj_t *mem_objects, - uint32_t num_mem_objects, const clobj_t *wait_for, uint32_t num_wait_for); -error *enqueue_release_gl_objects( - clobj_t *event, clobj_t queue, const clobj_t *mem_objects, - uint32_t num_mem_objects, const clobj_t *wait_for, uint32_t num_wait_for); -cl_context_properties get_apple_cgl_share_group(); -error *get_gl_object_info(clobj_t mem, cl_gl_object_type *otype, - GLuint *gl_name); diff --git a/src/c_wrapper/wrap_constants.cpp b/src/c_wrapper/wrap_constants.cpp deleted file mode 100644 index 08ed2edea0ca9e28c337127af31d26e819a93631..0000000000000000000000000000000000000000 --- a/src/c_wrapper/wrap_constants.cpp +++ /dev/null @@ -1,827 +0,0 @@ -#include "wrap_cl.h" -#include - -#ifdef CONST -#undef CONST -#endif - -extern "C" -void populate_constants(void(*add)(const char*, const char*, int64_t value)) -{ -#define _ADD_ATTR(TYPE, PREFIX, NAME, SUFFIX, ...) \ - add(TYPE, #NAME, CL_##PREFIX##NAME##SUFFIX) -#define ADD_ATTR(TYPE, PREFIX, NAME, ...) \ - _ADD_ATTR(TYPE, PREFIX, NAME, __VA_ARGS__) - - // program_kind - add("program_kind", "UNKNOWN", KND_UNKNOWN); - add("program_kind", "SOURCE", KND_SOURCE); - add("program_kind", "BINARY", KND_BINARY); - - // status_code - ADD_ATTR("status_code", , SUCCESS); - ADD_ATTR("status_code", , DEVICE_NOT_FOUND); - ADD_ATTR("status_code", , DEVICE_NOT_AVAILABLE); -#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) - ADD_ATTR("status_code", , COMPILER_NOT_AVAILABLE); -#endif - ADD_ATTR("status_code", , MEM_OBJECT_ALLOCATION_FAILURE); - ADD_ATTR("status_code", , OUT_OF_RESOURCES); - ADD_ATTR("status_code", , OUT_OF_HOST_MEMORY); - ADD_ATTR("status_code", , PROFILING_INFO_NOT_AVAILABLE); - ADD_ATTR("status_code", , MEM_COPY_OVERLAP); - ADD_ATTR("status_code", , IMAGE_FORMAT_MISMATCH); - ADD_ATTR("status_code", , IMAGE_FORMAT_NOT_SUPPORTED); - ADD_ATTR("status_code", , BUILD_PROGRAM_FAILURE); - ADD_ATTR("status_code", , MAP_FAILURE); - - ADD_ATTR("status_code", , INVALID_VALUE); - ADD_ATTR("status_code", , INVALID_DEVICE_TYPE); - ADD_ATTR("status_code", , INVALID_PLATFORM); - ADD_ATTR("status_code", , INVALID_DEVICE); - ADD_ATTR("status_code", , INVALID_CONTEXT); - ADD_ATTR("status_code", , INVALID_QUEUE_PROPERTIES); - ADD_ATTR("status_code", , INVALID_COMMAND_QUEUE); - ADD_ATTR("status_code", , INVALID_HOST_PTR); - ADD_ATTR("status_code", , INVALID_MEM_OBJECT); - ADD_ATTR("status_code", , INVALID_IMAGE_FORMAT_DESCRIPTOR); - ADD_ATTR("status_code", , INVALID_IMAGE_SIZE); - ADD_ATTR("status_code", , INVALID_SAMPLER); - ADD_ATTR("status_code", , INVALID_BINARY); - ADD_ATTR("status_code", , INVALID_BUILD_OPTIONS); - ADD_ATTR("status_code", , INVALID_PROGRAM); - ADD_ATTR("status_code", , INVALID_PROGRAM_EXECUTABLE); - ADD_ATTR("status_code", , INVALID_KERNEL_NAME); - ADD_ATTR("status_code", , INVALID_KERNEL_DEFINITION); - ADD_ATTR("status_code", , INVALID_KERNEL); - ADD_ATTR("status_code", , INVALID_ARG_INDEX); - ADD_ATTR("status_code", , INVALID_ARG_VALUE); - ADD_ATTR("status_code", , INVALID_ARG_SIZE); - ADD_ATTR("status_code", , INVALID_KERNEL_ARGS); - ADD_ATTR("status_code", , INVALID_WORK_DIMENSION); - ADD_ATTR("status_code", , INVALID_WORK_GROUP_SIZE); - ADD_ATTR("status_code", , INVALID_WORK_ITEM_SIZE); - ADD_ATTR("status_code", , INVALID_GLOBAL_OFFSET); - ADD_ATTR("status_code", , INVALID_EVENT_WAIT_LIST); - ADD_ATTR("status_code", , INVALID_EVENT); - ADD_ATTR("status_code", , INVALID_OPERATION); - ADD_ATTR("status_code", , INVALID_GL_OBJECT); - ADD_ATTR("status_code", , INVALID_BUFFER_SIZE); - ADD_ATTR("status_code", , INVALID_MIP_LEVEL); - -#if defined(cl_khr_icd) && (cl_khr_icd >= 1) - ADD_ATTR("status_code", , PLATFORM_NOT_FOUND_KHR); -#endif - -#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) - ADD_ATTR("status_code", , INVALID_GL_SHAREGROUP_REFERENCE_KHR); -#endif - -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("status_code", , MISALIGNED_SUB_BUFFER_OFFSET); - ADD_ATTR("status_code", , EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); - ADD_ATTR("status_code", , INVALID_GLOBAL_WORK_SIZE); -#endif - -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("status_code", , COMPILE_PROGRAM_FAILURE); - ADD_ATTR("status_code", , LINKER_NOT_AVAILABLE); - ADD_ATTR("status_code", , LINK_PROGRAM_FAILURE); - ADD_ATTR("status_code", , DEVICE_PARTITION_FAILED); - ADD_ATTR("status_code", , KERNEL_ARG_INFO_NOT_AVAILABLE); - ADD_ATTR("status_code", , INVALID_IMAGE_DESCRIPTOR); - ADD_ATTR("status_code", , INVALID_COMPILER_OPTIONS); - ADD_ATTR("status_code", , INVALID_LINKER_OPTIONS); - ADD_ATTR("status_code", , INVALID_DEVICE_PARTITION_COUNT); -#endif - -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("status_code", , INVALID_PIPE_SIZE); - ADD_ATTR("status_code", , INVALID_DEVICE_QUEUE); -#endif - - // platform_info - ADD_ATTR("platform_info", PLATFORM_, PROFILE); - ADD_ATTR("platform_info", PLATFORM_, VERSION); - ADD_ATTR("platform_info", PLATFORM_, NAME); - ADD_ATTR("platform_info", PLATFORM_, VENDOR); -#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) - ADD_ATTR("platform_info", PLATFORM_, EXTENSIONS); -#endif - - - // device_type - ADD_ATTR("device_type", DEVICE_TYPE_, DEFAULT); - ADD_ATTR("device_type", DEVICE_TYPE_, CPU); - ADD_ATTR("device_type", DEVICE_TYPE_, GPU); - ADD_ATTR("device_type", DEVICE_TYPE_, ACCELERATOR); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("device_type", DEVICE_TYPE_, CUSTOM); -#endif - ADD_ATTR("device_type", DEVICE_TYPE_, ALL); - - - // device_info - ADD_ATTR("device_info", DEVICE_, TYPE); - ADD_ATTR("device_info", DEVICE_, VENDOR_ID); - ADD_ATTR("device_info", DEVICE_, MAX_COMPUTE_UNITS); - ADD_ATTR("device_info", DEVICE_, MAX_WORK_ITEM_DIMENSIONS); - ADD_ATTR("device_info", DEVICE_, MAX_WORK_GROUP_SIZE); - ADD_ATTR("device_info", DEVICE_, MAX_WORK_ITEM_SIZES); - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_CHAR); - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_SHORT); - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_INT); - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_LONG); - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_FLOAT); - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_DOUBLE); - ADD_ATTR("device_info", DEVICE_, MAX_CLOCK_FREQUENCY); - ADD_ATTR("device_info", DEVICE_, ADDRESS_BITS); - ADD_ATTR("device_info", DEVICE_, MAX_READ_IMAGE_ARGS); - ADD_ATTR("device_info", DEVICE_, MAX_WRITE_IMAGE_ARGS); - ADD_ATTR("device_info", DEVICE_, MAX_MEM_ALLOC_SIZE); - ADD_ATTR("device_info", DEVICE_, IMAGE2D_MAX_WIDTH); - ADD_ATTR("device_info", DEVICE_, IMAGE2D_MAX_HEIGHT); - ADD_ATTR("device_info", DEVICE_, IMAGE3D_MAX_WIDTH); - ADD_ATTR("device_info", DEVICE_, IMAGE3D_MAX_HEIGHT); - ADD_ATTR("device_info", DEVICE_, IMAGE3D_MAX_DEPTH); - ADD_ATTR("device_info", DEVICE_, IMAGE_SUPPORT); - ADD_ATTR("device_info", DEVICE_, MAX_PARAMETER_SIZE); - ADD_ATTR("device_info", DEVICE_, MAX_SAMPLERS); - ADD_ATTR("device_info", DEVICE_, MEM_BASE_ADDR_ALIGN); - ADD_ATTR("device_info", DEVICE_, MIN_DATA_TYPE_ALIGN_SIZE); - ADD_ATTR("device_info", DEVICE_, SINGLE_FP_CONFIG); -#ifdef CL_DEVICE_DOUBLE_FP_CONFIG - ADD_ATTR("device_info", DEVICE_, DOUBLE_FP_CONFIG); -#endif -#ifdef CL_DEVICE_HALF_FP_CONFIG - ADD_ATTR("device_info", DEVICE_, HALF_FP_CONFIG); -#endif - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_CACHE_TYPE); - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_CACHELINE_SIZE); - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_CACHE_SIZE); - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_SIZE); - ADD_ATTR("device_info", DEVICE_, MAX_CONSTANT_BUFFER_SIZE); - ADD_ATTR("device_info", DEVICE_, MAX_CONSTANT_ARGS); - ADD_ATTR("device_info", DEVICE_, LOCAL_MEM_TYPE); - ADD_ATTR("device_info", DEVICE_, LOCAL_MEM_SIZE); - ADD_ATTR("device_info", DEVICE_, ERROR_CORRECTION_SUPPORT); - ADD_ATTR("device_info", DEVICE_, PROFILING_TIMER_RESOLUTION); - ADD_ATTR("device_info", DEVICE_, ENDIAN_LITTLE); - ADD_ATTR("device_info", DEVICE_, AVAILABLE); - ADD_ATTR("device_info", DEVICE_, COMPILER_AVAILABLE); - ADD_ATTR("device_info", DEVICE_, EXECUTION_CAPABILITIES); - ADD_ATTR("device_info", DEVICE_, QUEUE_PROPERTIES); -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("device_info", DEVICE_, QUEUE_ON_HOST_PROPERTIES); -#endif - ADD_ATTR("device_info", DEVICE_, NAME); - ADD_ATTR("device_info", DEVICE_, VENDOR); - ADD_ATTR("device_info", , DRIVER_VERSION); - ADD_ATTR("device_info", DEVICE_, VERSION); - ADD_ATTR("device_info", DEVICE_, PROFILE); - ADD_ATTR("device_info", DEVICE_, EXTENSIONS); - ADD_ATTR("device_info", DEVICE_, PLATFORM); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("device_info", DEVICE_, PREFERRED_VECTOR_WIDTH_HALF); - ADD_ATTR("device_info", DEVICE_, HOST_UNIFIED_MEMORY); // deprecated in 2.0 - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_CHAR); - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_SHORT); - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_INT); - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_LONG); - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_FLOAT); - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_DOUBLE); - ADD_ATTR("device_info", DEVICE_, NATIVE_VECTOR_WIDTH_HALF); - ADD_ATTR("device_info", DEVICE_, OPENCL_C_VERSION); -#endif -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV - ADD_ATTR("device_info", DEVICE_, COMPUTE_CAPABILITY_MAJOR_NV); - ADD_ATTR("device_info", DEVICE_, COMPUTE_CAPABILITY_MINOR_NV); - ADD_ATTR("device_info", DEVICE_, REGISTERS_PER_BLOCK_NV); - ADD_ATTR("device_info", DEVICE_, WARP_SIZE_NV); - ADD_ATTR("device_info", DEVICE_, GPU_OVERLAP_NV); - ADD_ATTR("device_info", DEVICE_, KERNEL_EXEC_TIMEOUT_NV); - ADD_ATTR("device_info", DEVICE_, INTEGRATED_MEMORY_NV); - // Nvidia specific device attributes, not defined in Khronos CL/cl_ext.h -#ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV - ADD_ATTR("device_info", DEVICE_, ATTRIBUTE_ASYNC_ENGINE_COUNT_NV); -#endif -#ifdef CL_DEVICE_PCI_BUS_ID_NV - ADD_ATTR("device_info", DEVICE_, PCI_BUS_ID_NV); -#endif -#ifdef CL_DEVICE_PCI_SLOT_ID_NV - ADD_ATTR("device_info", DEVICE_, PCI_SLOT_ID_NV); -#endif -#endif -#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD - ADD_ATTR("device_info", DEVICE_, PROFILING_TIMER_OFFSET_AMD); -#endif -#ifdef CL_DEVICE_TOPOLOGY_AMD - ADD_ATTR("device_info", DEVICE_, TOPOLOGY_AMD); -#endif -#ifdef CL_DEVICE_BOARD_NAME_AMD - ADD_ATTR("device_info", DEVICE_, BOARD_NAME_AMD); -#endif -#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD - ADD_ATTR("device_info", DEVICE_, GLOBAL_FREE_MEMORY_AMD); -#endif -#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD - ADD_ATTR("device_info", DEVICE_, SIMD_PER_COMPUTE_UNIT_AMD); -#endif -#ifdef CL_DEVICE_SIMD_WIDTH_AMD - ADD_ATTR("device_info", DEVICE_, SIMD_WIDTH_AMD); -#endif -#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD - ADD_ATTR("device_info", DEVICE_, SIMD_INSTRUCTION_WIDTH_AMD); -#endif -#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD - ADD_ATTR("device_info", DEVICE_, WAVEFRONT_WIDTH_AMD); -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_CHANNELS_AMD); -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_CHANNEL_BANKS_AMD); -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD - ADD_ATTR("device_info", DEVICE_, GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD); -#endif -#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD - ADD_ATTR("device_info", DEVICE_, LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD); -#endif -#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD - ADD_ATTR("device_info", DEVICE_, LOCAL_MEM_BANKS_AMD); -#endif - -#ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD - ADD_ATTR("device_info", DEVICE_, THREAD_TRACE_SUPPORTED_AMD); -#endif -#ifdef CL_DEVICE_GFXIP_MAJOR_AMD - ADD_ATTR("device_info", DEVICE_, GFXIP_MAJOR_AMD); -#endif -#ifdef CL_DEVICE_GFXIP_MINOR_AMD - ADD_ATTR("device_info", DEVICE_, GFXIP_MINOR_AMD); -#endif -#ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD - ADD_ATTR("device_info", DEVICE_, AVAILABLE_ASYNC_QUEUES_AMD); -#endif - -#ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT - ADD_ATTR("device_info", DEVICE_, MAX_ATOMIC_COUNTERS_EXT); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("device_info", DEVICE_, LINKER_AVAILABLE); - ADD_ATTR("device_info", DEVICE_, BUILT_IN_KERNELS); - ADD_ATTR("device_info", DEVICE_, IMAGE_MAX_BUFFER_SIZE); - ADD_ATTR("device_info", DEVICE_, IMAGE_MAX_ARRAY_SIZE); - ADD_ATTR("device_info", DEVICE_, PARENT_DEVICE); - ADD_ATTR("device_info", DEVICE_, PARTITION_MAX_SUB_DEVICES); - ADD_ATTR("device_info", DEVICE_, PARTITION_PROPERTIES); - ADD_ATTR("device_info", DEVICE_, PARTITION_AFFINITY_DOMAIN); - ADD_ATTR("device_info", DEVICE_, PARTITION_TYPE); - ADD_ATTR("device_info", DEVICE_, REFERENCE_COUNT); - ADD_ATTR("device_info", DEVICE_, PREFERRED_INTEROP_USER_SYNC); - ADD_ATTR("device_info", DEVICE_, PRINTF_BUFFER_SIZE); -#endif -#ifdef cl_khr_image2d_from_buffer - ADD_ATTR("device_info", DEVICE_, IMAGE_PITCH_ALIGNMENT); - ADD_ATTR("device_info", DEVICE_, IMAGE_BASE_ADDRESS_ALIGNMENT); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("device_info", DEVICE_, MAX_READ_WRITE_IMAGE_ARGS); - ADD_ATTR("device_info", DEVICE_, MAX_GLOBAL_VARIABLE_SIZE); - ADD_ATTR("device_info", DEVICE_, QUEUE_ON_DEVICE_PROPERTIES); - ADD_ATTR("device_info", DEVICE_, QUEUE_ON_DEVICE_PREFERRED_SIZE); - ADD_ATTR("device_info", DEVICE_, QUEUE_ON_DEVICE_MAX_SIZE); - ADD_ATTR("device_info", DEVICE_, MAX_ON_DEVICE_QUEUES); - ADD_ATTR("device_info", DEVICE_, MAX_ON_DEVICE_EVENTS); - ADD_ATTR("device_info", DEVICE_, SVM_CAPABILITIES); - ADD_ATTR("device_info", DEVICE_, GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE); - ADD_ATTR("device_info", DEVICE_, MAX_PIPE_ARGS); - ADD_ATTR("device_info", DEVICE_, PIPE_MAX_ACTIVE_RESERVATIONS); - ADD_ATTR("device_info", DEVICE_, PIPE_MAX_PACKET_SIZE); - ADD_ATTR("device_info", DEVICE_, PREFERRED_PLATFORM_ATOMIC_ALIGNMENT); - ADD_ATTR("device_info", DEVICE_, PREFERRED_GLOBAL_ATOMIC_ALIGNMENT); - ADD_ATTR("device_info", DEVICE_, PREFERRED_LOCAL_ATOMIC_ALIGNMENT); -#endif -#if PYOPENCL_CL_VERSION >= 0x2010 - ADD_ATTR("device_info", DEVICE_, IL_VERSION); - ADD_ATTR("device_info", DEVICE_, MAX_NUM_SUB_GROUPS); - ADD_ATTR("device_info", DEVICE_, SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS); -#endif - /* cl_intel_advanced_motion_estimation */ -#ifdef CL_DEVICE_ME_VERSION_INTEL - ADD_ATTR("device_info", DEVICE_, ME_VERSION_INTEL); -#endif - - /* cl_qcom_ext_host_ptr */ -#ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM - ADD_ATTR("device_info", DEVICE_, EXT_MEM_PADDING_IN_BYTES_QCOM); -#endif -#ifdef CL_DEVICE_PAGE_SIZE_QCOM - ADD_ATTR("device_info", DEVICE_, PAGE_SIZE_QCOM); -#endif - - /* cl_khr_spir */ -#ifdef CL_DEVICE_SPIR_VERSIONS - ADD_ATTR("device_info", DEVICE_, SPIR_VERSIONS); -#endif - - /* cl_altera_device_temperature */ -#ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA - ADD_ATTR("device_info", DEVICE_, CORE_TEMPERATURE_ALTERA); -#endif - - /* cl_intel_simultaneous_sharing */ -#ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL - ADD_ATTR("device_info", DEVICE_, SIMULTANEOUS_INTEROPS_INTEL); -#endif -#ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL - ADD_ATTR("device_info", DEVICE_, NUM_SIMULTANEOUS_INTEROPS_INTEL); -#endif - - // device_fp_config - ADD_ATTR("device_fp_config", FP_, DENORM); - ADD_ATTR("device_fp_config", FP_, INF_NAN); - ADD_ATTR("device_fp_config", FP_, ROUND_TO_NEAREST); - ADD_ATTR("device_fp_config", FP_, ROUND_TO_ZERO); - ADD_ATTR("device_fp_config", FP_, ROUND_TO_INF); - ADD_ATTR("device_fp_config", FP_, FMA); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("device_fp_config", FP_, SOFT_FLOAT); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("device_fp_config", FP_, CORRECTLY_ROUNDED_DIVIDE_SQRT); -#endif - - - // device_mem_cache_type - ADD_ATTR("device_mem_cache_type", , NONE); - ADD_ATTR("device_mem_cache_type", , READ_ONLY_CACHE); - ADD_ATTR("device_mem_cache_type", , READ_WRITE_CACHE); - - - // device_local_mem_type - ADD_ATTR("device_local_mem_type", , LOCAL); - ADD_ATTR("device_local_mem_type", , GLOBAL); - - - // device_exec_capabilities - ADD_ATTR("device_exec_capabilities", EXEC_, KERNEL); - ADD_ATTR("device_exec_capabilities", EXEC_, NATIVE_KERNEL); -#ifdef CL_EXEC_IMMEDIATE_EXECUTION_INTEL - ADD_ATTR("device_exec_capabilities", EXEC_, IMMEDIATE_EXECUTION_INTEL); -#endif - -#if PYOPENCL_CL_VERSION >= 0x2000 - // device_svm_capabilities - ADD_ATTR("device_svm_capabilities", DEVICE_SVM_, COARSE_GRAIN_BUFFER); - ADD_ATTR("device_svm_capabilities", DEVICE_SVM_, FINE_GRAIN_BUFFER); - ADD_ATTR("device_svm_capabilities", DEVICE_SVM_, FINE_GRAIN_SYSTEM); - ADD_ATTR("device_svm_capabilities", DEVICE_SVM_, ATOMICS); -#endif - - - // command_queue_properties - ADD_ATTR("command_queue_properties", QUEUE_, OUT_OF_ORDER_EXEC_MODE_ENABLE); - ADD_ATTR("command_queue_properties", QUEUE_, PROFILING_ENABLE); -#ifdef CL_QUEUE_IMMEDIATE_EXECUTION_ENABLE_INTEL - ADD_ATTR("command_queue_properties", QUEUE_, IMMEDIATE_EXECUTION_ENABLE_INTEL); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("command_queue_properties", QUEUE_, ON_DEVICE); - ADD_ATTR("command_queue_properties", QUEUE_, ON_DEVICE_DEFAULT); -#endif - - - // context_info - ADD_ATTR("context_info", CONTEXT_, REFERENCE_COUNT); - ADD_ATTR("context_info", CONTEXT_, DEVICES); - ADD_ATTR("context_info", CONTEXT_, PROPERTIES); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("context_info", CONTEXT_, NUM_DEVICES); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("context_info", CONTEXT_, INTEROP_USER_SYNC); -#endif - - - // gl_context_info -#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) - ADD_ATTR("gl_context_info", , CURRENT_DEVICE_FOR_GL_CONTEXT_KHR); - ADD_ATTR("gl_context_info", , DEVICES_FOR_GL_CONTEXT_KHR); -#endif - - - // context_properties - ADD_ATTR("context_properties", CONTEXT_, PLATFORM); -#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) - ADD_ATTR("context_properties", ,GL_CONTEXT_KHR); - ADD_ATTR("context_properties", ,EGL_DISPLAY_KHR); - ADD_ATTR("context_properties", ,GLX_DISPLAY_KHR); - ADD_ATTR("context_properties", ,WGL_HDC_KHR); - ADD_ATTR("context_properties", ,CGL_SHAREGROUP_KHR); -#endif -#if defined(__APPLE__) && defined(HAVE_GL) && !defined(PYOPENCL_APPLE_USE_CL_H) - ADD_ATTR("context_properties", ,CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE); -#endif /* __APPLE__ */ -#ifdef CL_CONTEXT_OFFLINE_DEVICES_AMD - ADD_ATTR("context_properties", CONTEXT_, OFFLINE_DEVICES_AMD); -#endif - - - // command_queue_info - ADD_ATTR("command_queue_info", QUEUE_, CONTEXT); - ADD_ATTR("command_queue_info", QUEUE_, DEVICE); - ADD_ATTR("command_queue_info", QUEUE_, REFERENCE_COUNT); - ADD_ATTR("command_queue_info", QUEUE_, PROPERTIES); - - - // queue_properties -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("queue_properties", QUEUE_, PROPERTIES); - ADD_ATTR("queue_properties", QUEUE_, SIZE); -#endif - - - // mem_flags - ADD_ATTR("mem_flags", MEM_, READ_WRITE); - ADD_ATTR("mem_flags", MEM_, WRITE_ONLY); - ADD_ATTR("mem_flags", MEM_, READ_ONLY); - ADD_ATTR("mem_flags", MEM_, USE_HOST_PTR); - ADD_ATTR("mem_flags", MEM_, ALLOC_HOST_PTR); - ADD_ATTR("mem_flags", MEM_, COPY_HOST_PTR); -#ifdef cl_amd_device_memory_flags - ADD_ATTR("mem_flags", MEM_, USE_PERSISTENT_MEM_AMD); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("mem_flags", MEM_, HOST_WRITE_ONLY); - ADD_ATTR("mem_flags", MEM_, HOST_READ_ONLY); - ADD_ATTR("mem_flags", MEM_, HOST_NO_ACCESS); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("mem_flags", MEM_, KERNEL_READ_AND_WRITE); -#endif - -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("svm_mem_flags", MEM_, READ_WRITE); - ADD_ATTR("svm_mem_flags", MEM_, WRITE_ONLY); - ADD_ATTR("svm_mem_flags", MEM_, READ_ONLY); - ADD_ATTR("svm_mem_flags", MEM_, SVM_FINE_GRAIN_BUFFER); - ADD_ATTR("svm_mem_flags", MEM_, SVM_ATOMICS); -#endif - - - // channel_order - ADD_ATTR("channel_order", , R); - ADD_ATTR("channel_order", , A); - ADD_ATTR("channel_order", , RG); - ADD_ATTR("channel_order", , RA); - ADD_ATTR("channel_order", , RGB); - ADD_ATTR("channel_order", , RGBA); - ADD_ATTR("channel_order", , BGRA); - ADD_ATTR("channel_order", , INTENSITY); - ADD_ATTR("channel_order", , LUMINANCE); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("channel_order", , Rx); - ADD_ATTR("channel_order", , RGx); - ADD_ATTR("channel_order", , RGBx); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("channel_order", , sRGB); - ADD_ATTR("channel_order", , sRGBx); - ADD_ATTR("channel_order", , sRGBA); - ADD_ATTR("channel_order", , sBGRA); - ADD_ATTR("channel_order", , ABGR); -#endif - - - // channel_type - ADD_ATTR("channel_type", , SNORM_INT8); - ADD_ATTR("channel_type", , SNORM_INT16); - ADD_ATTR("channel_type", , UNORM_INT8); - ADD_ATTR("channel_type", , UNORM_INT16); - ADD_ATTR("channel_type", , UNORM_SHORT_565); - ADD_ATTR("channel_type", , UNORM_SHORT_555); - ADD_ATTR("channel_type", , UNORM_INT_101010); - ADD_ATTR("channel_type", , SIGNED_INT8); - ADD_ATTR("channel_type", , SIGNED_INT16); - ADD_ATTR("channel_type", , SIGNED_INT32); - ADD_ATTR("channel_type", , UNSIGNED_INT8); - ADD_ATTR("channel_type", , UNSIGNED_INT16); - ADD_ATTR("channel_type", , UNSIGNED_INT32); - ADD_ATTR("channel_type", , HALF_FLOAT); - ADD_ATTR("channel_type", , FLOAT); - - - // mem_object_type - ADD_ATTR("mem_object_type", MEM_OBJECT_, BUFFER); - ADD_ATTR("mem_object_type", MEM_OBJECT_, IMAGE2D); - ADD_ATTR("mem_object_type", MEM_OBJECT_, IMAGE3D); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("mem_object_type", MEM_OBJECT_, IMAGE2D_ARRAY); - ADD_ATTR("mem_object_type", MEM_OBJECT_, IMAGE1D); - ADD_ATTR("mem_object_type", MEM_OBJECT_, IMAGE1D_ARRAY); - ADD_ATTR("mem_object_type", MEM_OBJECT_, IMAGE1D_BUFFER); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("mem_object_type", MEM_OBJECT_, PIPE); -#endif - - - // mem_info - ADD_ATTR("mem_info", MEM_, TYPE); - ADD_ATTR("mem_info", MEM_, FLAGS); - ADD_ATTR("mem_info", MEM_, SIZE); - ADD_ATTR("mem_info", MEM_, HOST_PTR); - ADD_ATTR("mem_info", MEM_, MAP_COUNT); - ADD_ATTR("mem_info", MEM_, REFERENCE_COUNT); - ADD_ATTR("mem_info", MEM_, CONTEXT); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("mem_info", MEM_, ASSOCIATED_MEMOBJECT); - ADD_ATTR("mem_info", MEM_, OFFSET); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("mem_info", MEM_, USES_SVM_POINTER); -#endif - - - // image_info - ADD_ATTR("image_info", IMAGE_, FORMAT); - ADD_ATTR("image_info", IMAGE_, ELEMENT_SIZE); - ADD_ATTR("image_info", IMAGE_, ROW_PITCH); - ADD_ATTR("image_info", IMAGE_, SLICE_PITCH); - ADD_ATTR("image_info", IMAGE_, WIDTH); - ADD_ATTR("image_info", IMAGE_, HEIGHT); - ADD_ATTR("image_info", IMAGE_, DEPTH); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("image_info", IMAGE_, ARRAY_SIZE); - ADD_ATTR("image_info", IMAGE_, BUFFER); - ADD_ATTR("image_info", IMAGE_, NUM_MIP_LEVELS); - ADD_ATTR("image_info", IMAGE_, NUM_SAMPLES); -#endif - - - // addressing_mode - ADD_ATTR("addressing_mode", ADDRESS_, NONE); - ADD_ATTR("addressing_mode", ADDRESS_, CLAMP_TO_EDGE); - ADD_ATTR("addressing_mode", ADDRESS_, CLAMP); - ADD_ATTR("addressing_mode", ADDRESS_, REPEAT); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("addressing_mode", ADDRESS_, MIRRORED_REPEAT); -#endif - - - // filter_mode - ADD_ATTR("filter_mode", FILTER_, NEAREST); - ADD_ATTR("filter_mode", FILTER_, LINEAR); - - - // sampler_info - ADD_ATTR("sampler_info", SAMPLER_, REFERENCE_COUNT); - ADD_ATTR("sampler_info", SAMPLER_, CONTEXT); - ADD_ATTR("sampler_info", SAMPLER_, NORMALIZED_COORDS); - ADD_ATTR("sampler_info", SAMPLER_, ADDRESSING_MODE); - ADD_ATTR("sampler_info", SAMPLER_, FILTER_MODE); -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("sampler_info", SAMPLER_, MIP_FILTER_MODE); - ADD_ATTR("sampler_info", SAMPLER_, LOD_MIN); - ADD_ATTR("sampler_info", SAMPLER_, LOD_MAX); -#endif - - - // map_flags - ADD_ATTR("map_flags", MAP_, READ); - ADD_ATTR("map_flags", MAP_, WRITE); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("map_flags", MAP_, WRITE_INVALIDATE_REGION); -#endif - - - // program_info - ADD_ATTR("program_info", PROGRAM_, REFERENCE_COUNT); - ADD_ATTR("program_info", PROGRAM_, CONTEXT); - ADD_ATTR("program_info", PROGRAM_, NUM_DEVICES); - ADD_ATTR("program_info", PROGRAM_, DEVICES); - ADD_ATTR("program_info", PROGRAM_, SOURCE); - ADD_ATTR("program_info", PROGRAM_, BINARY_SIZES); - ADD_ATTR("program_info", PROGRAM_, BINARIES); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("program_info", PROGRAM_, NUM_KERNELS); - ADD_ATTR("program_info", PROGRAM_, KERNEL_NAMES); -#endif - - - // program_build_info - ADD_ATTR("program_build_info", PROGRAM_BUILD_, STATUS); - ADD_ATTR("program_build_info", PROGRAM_BUILD_, OPTIONS); - ADD_ATTR("program_build_info", PROGRAM_BUILD_, LOG); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("program_build_info", PROGRAM_, BINARY_TYPE); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("program_build_info", PROGRAM_BUILD_, GLOBAL_VARIABLE_TOTAL_SIZE); -#endif - - - // program_binary_type -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("program_binary_type", PROGRAM_BINARY_TYPE_, NONE); - ADD_ATTR("program_binary_type", PROGRAM_BINARY_TYPE_, COMPILED_OBJECT); - ADD_ATTR("program_binary_type", PROGRAM_BINARY_TYPE_, LIBRARY); - ADD_ATTR("program_binary_type", PROGRAM_BINARY_TYPE_, EXECUTABLE); -#endif - - - // kernel_info - ADD_ATTR("kernel_info", KERNEL_, FUNCTION_NAME); - ADD_ATTR("kernel_info", KERNEL_, NUM_ARGS); - ADD_ATTR("kernel_info", KERNEL_, REFERENCE_COUNT); - ADD_ATTR("kernel_info", KERNEL_, CONTEXT); - ADD_ATTR("kernel_info", KERNEL_, PROGRAM); -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("kernel_info", KERNEL_, ATTRIBUTES); -#endif - - - // kernel_arg_info -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("kernel_arg_info", KERNEL_ARG_, ADDRESS_QUALIFIER); - ADD_ATTR("kernel_arg_info", KERNEL_ARG_, ACCESS_QUALIFIER); - ADD_ATTR("kernel_arg_info", KERNEL_ARG_, TYPE_NAME); - ADD_ATTR("kernel_arg_info", KERNEL_ARG_, TYPE_QUALIFIER); - ADD_ATTR("kernel_arg_info", KERNEL_ARG_, NAME); -#endif - - - // kernel_arg_address_qualifier -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("kernel_arg_address_qualifier", KERNEL_ARG_ADDRESS_, GLOBAL); - ADD_ATTR("kernel_arg_address_qualifier", KERNEL_ARG_ADDRESS_, LOCAL); - ADD_ATTR("kernel_arg_address_qualifier", KERNEL_ARG_ADDRESS_, CONSTANT); - ADD_ATTR("kernel_arg_address_qualifier", KERNEL_ARG_ADDRESS_, PRIVATE); -#endif - - - // kernel_arg_access_qualifier -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("kernel_arg_access_qualifier", KERNEL_ARG_ACCESS_, READ_ONLY); - ADD_ATTR("kernel_arg_access_qualifier", KERNEL_ARG_ACCESS_, WRITE_ONLY); - ADD_ATTR("kernel_arg_access_qualifier", KERNEL_ARG_ACCESS_, READ_WRITE); - ADD_ATTR("kernel_arg_access_qualifier", KERNEL_ARG_ACCESS_, NONE); -#endif - - - // kernel_arg_type_qualifier -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("kernel_arg_type_qualifier", KERNEL_ARG_TYPE_, NONE); - ADD_ATTR("kernel_arg_type_qualifier", KERNEL_ARG_TYPE_, CONST); - ADD_ATTR("kernel_arg_type_qualifier", KERNEL_ARG_TYPE_, RESTRICT); - ADD_ATTR("kernel_arg_type_qualifier", KERNEL_ARG_TYPE_, VOLATILE); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("kernel_arg_type_qualifier", KERNEL_ARG_TYPE_, PIPE); -#endif - - - // kernel_work_group_info - ADD_ATTR("kernel_work_group_info", KERNEL_, WORK_GROUP_SIZE); - ADD_ATTR("kernel_work_group_info", KERNEL_, COMPILE_WORK_GROUP_SIZE); - ADD_ATTR("kernel_work_group_info", KERNEL_, LOCAL_MEM_SIZE); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("kernel_work_group_info", KERNEL_, PREFERRED_WORK_GROUP_SIZE_MULTIPLE); - ADD_ATTR("kernel_work_group_info", KERNEL_, PRIVATE_MEM_SIZE); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("kernel_work_group_info", KERNEL_, GLOBAL_WORK_SIZE); -#endif - - - // event_info - ADD_ATTR("event_info", EVENT_, COMMAND_QUEUE); - ADD_ATTR("event_info", EVENT_, COMMAND_TYPE); - ADD_ATTR("event_info", EVENT_, REFERENCE_COUNT); - ADD_ATTR("event_info", EVENT_, COMMAND_EXECUTION_STATUS); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("event_info", EVENT_, CONTEXT); -#endif - - - // command_type - ADD_ATTR("command_type", COMMAND_, NDRANGE_KERNEL); - ADD_ATTR("command_type", COMMAND_, TASK); - ADD_ATTR("command_type", COMMAND_, NATIVE_KERNEL); - ADD_ATTR("command_type", COMMAND_, READ_BUFFER); - ADD_ATTR("command_type", COMMAND_, WRITE_BUFFER); - ADD_ATTR("command_type", COMMAND_, COPY_BUFFER); - ADD_ATTR("command_type", COMMAND_, READ_IMAGE); - ADD_ATTR("command_type", COMMAND_, WRITE_IMAGE); - ADD_ATTR("command_type", COMMAND_, COPY_IMAGE); - ADD_ATTR("command_type", COMMAND_, COPY_IMAGE_TO_BUFFER); - ADD_ATTR("command_type", COMMAND_, COPY_BUFFER_TO_IMAGE); - ADD_ATTR("command_type", COMMAND_, MAP_BUFFER); - ADD_ATTR("command_type", COMMAND_, MAP_IMAGE); - ADD_ATTR("command_type", COMMAND_, UNMAP_MEM_OBJECT); - ADD_ATTR("command_type", COMMAND_, MARKER); - ADD_ATTR("command_type", COMMAND_, ACQUIRE_GL_OBJECTS); - ADD_ATTR("command_type", COMMAND_, RELEASE_GL_OBJECTS); -#if PYOPENCL_CL_VERSION >= 0x1010 - ADD_ATTR("command_type", COMMAND_, READ_BUFFER_RECT); - ADD_ATTR("command_type", COMMAND_, WRITE_BUFFER_RECT); - ADD_ATTR("command_type", COMMAND_, COPY_BUFFER_RECT); - ADD_ATTR("command_type", COMMAND_, USER); -#endif -#ifdef cl_ext_migrate_memobject - ADD_ATTR("command_type", COMMAND_, MIGRATE_MEM_OBJECT_EXT); -#endif -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("command_type", COMMAND_, BARRIER); - ADD_ATTR("command_type", COMMAND_, MIGRATE_MEM_OBJECTS); - ADD_ATTR("command_type", COMMAND_, FILL_BUFFER); - ADD_ATTR("command_type", COMMAND_, FILL_IMAGE); -#endif -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("command_type", COMMAND_, SVM_FREE); - ADD_ATTR("command_type", COMMAND_, SVM_MEMCPY); - ADD_ATTR("command_type", COMMAND_, SVM_MEMFILL); - ADD_ATTR("command_type", COMMAND_, SVM_MAP); - ADD_ATTR("command_type", COMMAND_, SVM_UNMAP); -#endif - - - // command_execution_status - ADD_ATTR("command_execution_status", , COMPLETE); - ADD_ATTR("command_execution_status", , RUNNING); - ADD_ATTR("command_execution_status", , SUBMITTED); - ADD_ATTR("command_execution_status", , QUEUED); - - - // profiling_info - ADD_ATTR("profiling_info", PROFILING_COMMAND_, QUEUED); - ADD_ATTR("profiling_info", PROFILING_COMMAND_, SUBMIT); - ADD_ATTR("profiling_info", PROFILING_COMMAND_, START); - ADD_ATTR("profiling_info", PROFILING_COMMAND_, END); -#if PYOPENCL_CL_VERSION >= 0x2000 - ADD_ATTR("profiling_info", PROFILING_COMMAND_, COMPLETE); -#endif - - - // mem_migration_flags -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("mem_migration_flags", MIGRATE_MEM_OBJECT_, HOST); - ADD_ATTR("mem_migration_flags", MIGRATE_MEM_OBJECT_, CONTENT_UNDEFINED); -#endif - - - // mem_migration_flags_ext -#ifdef cl_ext_migrate_memobject - ADD_ATTR("mem_migration_flags_ext", MIGRATE_MEM_OBJECT_, HOST, _EXT); - - // As of 2018-07-11, the official headers seem to have dropped this: -#ifdef CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED_EXT - ADD_ATTR("mem_migration_flags_ext", MIGRATE_MEM_OBJECT_, - CONTENT_UNDEFINED, _EXT); -#endif - -#endif - - - // device_partition_property -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("device_partition_property", DEVICE_PARTITION_, EQUALLY); - ADD_ATTR("device_partition_property", DEVICE_PARTITION_, BY_COUNTS); - ADD_ATTR("device_partition_property", DEVICE_PARTITION_, BY_COUNTS_LIST_END); - ADD_ATTR("device_partition_property", DEVICE_PARTITION_, BY_AFFINITY_DOMAIN); -#endif - - - // device_affinity_domain -#if PYOPENCL_CL_VERSION >= 0x1020 - ADD_ATTR("device_affinity_domain", DEVICE_AFFINITY_DOMAIN_, NUMA); - ADD_ATTR("device_affinity_domain", DEVICE_AFFINITY_DOMAIN_, L4_CACHE); - ADD_ATTR("device_affinity_domain", DEVICE_AFFINITY_DOMAIN_, L3_CACHE); - ADD_ATTR("device_affinity_domain", DEVICE_AFFINITY_DOMAIN_, L2_CACHE); - ADD_ATTR("device_affinity_domain", DEVICE_AFFINITY_DOMAIN_, L1_CACHE); - ADD_ATTR("device_affinity_domain", DEVICE_AFFINITY_DOMAIN_, - NEXT_PARTITIONABLE); -#endif - - -#ifdef HAVE_GL - // gl_object_type - ADD_ATTR("gl_object_type", GL_OBJECT_, BUFFER); - ADD_ATTR("gl_object_type", GL_OBJECT_, TEXTURE2D); - ADD_ATTR("gl_object_type", GL_OBJECT_, TEXTURE3D); - ADD_ATTR("gl_object_type", GL_OBJECT_, RENDERBUFFER); - - - // gl_texture_info - ADD_ATTR("gl_texture_info", GL_, TEXTURE_TARGET); - ADD_ATTR("gl_texture_info", GL_, MIPMAP_LEVEL); -#endif - - - // migrate_mem_object_flags_ext -#ifdef cl_ext_migrate_memobject - ADD_ATTR("migrate_mem_object_flags_ext", MIGRATE_MEM_OBJECT_, HOST, _EXT); -#endif -} diff --git a/src/mempool.hpp b/src/mempool.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b24fcb02f8528b9e06f8011b69849a8a804b939d --- /dev/null +++ b/src/mempool.hpp @@ -0,0 +1,369 @@ +// Abstract memory pool implementation + +#ifndef _AFJDFJSDFSD_PYGPU_HEADER_SEEN_MEMPOOL_HPP +#define _AFJDFJSDFSD_PYGPU_HEADER_SEEN_MEMPOOL_HPP + + +#include +#include +#include +#include +#include +#include +#include "wrap_cl.hpp" +#include "bitlog.hpp" + + +namespace PYGPU_PACKAGE +{ + template + inline T signed_left_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x >> -shift_amount; + else + return x << shift_amount; + } + + + + + template + inline T signed_right_shift(T x, signed shift_amount) + { + if (shift_amount < 0) + return x << -shift_amount; + else + return x >> shift_amount; + } + + + + + template + class memory_pool : noncopyable + { + public: + typedef typename Allocator::pointer_type pointer_type; + typedef typename Allocator::size_type size_type; + + private: + typedef uint32_t bin_nr_t; + typedef std::vector bin_t; + + typedef std::map container_t; + container_t m_container; + typedef typename container_t::value_type bin_pair_t; + + std::unique_ptr m_allocator; + + // A held block is one that's been released by the application, but that + // we are keeping around to dish out again. + unsigned m_held_blocks; + + // An active block is one that is in use by the application. + unsigned m_active_blocks; + + bool m_stop_holding; + int m_trace; + + public: + memory_pool(Allocator const &alloc=Allocator()) + : m_allocator(alloc.copy()), + m_held_blocks(0), m_active_blocks(0), m_stop_holding(false), + m_trace(false) + { + if (m_allocator->is_deferred()) + { + PyErr_WarnEx(PyExc_UserWarning, "Memory pools expect non-deferred " + "semantics from their allocators. You passed a deferred " + "allocator, i.e. an allocator whose allocations can turn out to " + "be unavailable long after allocation.", 1); + } + } + + virtual ~memory_pool() + { free_held(); } + + static const unsigned mantissa_bits = 2; + static const unsigned mantissa_mask = (1 << mantissa_bits) - 1; + + static bin_nr_t bin_number(size_type size) + { + signed l = bitlog2(size); + size_type shifted = signed_right_shift(size, l-signed(mantissa_bits)); + if (size && (shifted & (1 << mantissa_bits)) == 0) + throw std::runtime_error("memory_pool::bin_number: bitlog2 fault"); + size_type chopped = shifted & mantissa_mask; + return l << mantissa_bits | chopped; + } + + void set_trace(bool flag) + { + if (flag) + ++m_trace; + else + --m_trace; + } + + static size_type alloc_size(bin_nr_t bin) + { + bin_nr_t exponent = bin >> mantissa_bits; + bin_nr_t mantissa = bin & mantissa_mask; + + size_type ones = signed_left_shift(1, + signed(exponent)-signed(mantissa_bits) + ); + if (ones) ones -= 1; + + size_type head = signed_left_shift( + (1<second; + } + else + return it->second; + } + + void inc_held_blocks() + { + if (m_held_blocks == 0) + start_holding_blocks(); + ++m_held_blocks; + } + + void dec_held_blocks() + { + --m_held_blocks; + if (m_held_blocks == 0) + stop_holding_blocks(); + } + + virtual void start_holding_blocks() + { } + + virtual void stop_holding_blocks() + { } + + public: + pointer_type allocate(size_type size) + { + bin_nr_t bin_nr = bin_number(size); + bin_t &bin = get_bin(bin_nr); + + if (bin.size()) + { + if (m_trace) + std::cout + << "[pool] allocation of size " << size << " served from bin " << bin_nr + << " which contained " << bin.size() << " entries" << std::endl; + return pop_block_from_bin(bin, size); + } + + size_type alloc_sz = alloc_size(bin_nr); + + assert(bin_number(alloc_sz) == bin_nr); + + if (m_trace) + std::cout << "[pool] allocation of size " << size << " required new memory" << std::endl; + + try { return get_from_allocator(alloc_sz); } + catch (PYGPU_PACKAGE::error &e) + { + if (!e.is_out_of_memory()) + throw; + } + + if (m_trace) + std::cout << "[pool] allocation triggered OOM, running GC" << std::endl; + + m_allocator->try_release_blocks(); + if (bin.size()) + return pop_block_from_bin(bin, size); + + if (m_trace) + std::cout << "[pool] allocation still OOM after GC" << std::endl; + + while (try_to_free_memory()) + { + try { return get_from_allocator(alloc_sz); } + catch (PYGPU_PACKAGE::error &e) + { + if (!e.is_out_of_memory()) + throw; + } + } + + throw PYGPU_PACKAGE::error( + "memory_pool::allocate", +#ifdef PYGPU_PYCUDA + CUDA_ERROR_OUT_OF_MEMORY, +#endif +#ifdef PYGPU_PYOPENCL + CL_MEM_OBJECT_ALLOCATION_FAILURE, +#endif + "failed to free memory for allocation"); + } + + void free(pointer_type p, size_type size) + { + --m_active_blocks; + bin_nr_t bin_nr = bin_number(size); + + if (!m_stop_holding) + { + inc_held_blocks(); + get_bin(bin_nr).push_back(p); + + if (m_trace) + std::cout << "[pool] block of size " << size << " returned to bin " + << bin_nr << " which now contains " << get_bin(bin_nr).size() + << " entries" << std::endl; + } + else + m_allocator->free(p); + } + + void free_held() + { + for (bin_pair_t &bin_pair: m_container) + { + bin_t &bin = bin_pair.second; + + while (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + } + } + + assert(m_held_blocks == 0); + } + + void stop_holding() + { + m_stop_holding = true; + free_held(); + } + + unsigned active_blocks() + { return m_active_blocks; } + + unsigned held_blocks() + { return m_held_blocks; } + + bool try_to_free_memory() + { + // free largest stuff first + for (bin_pair_t &bin_pair: reverse(m_container)) + { + bin_t &bin = bin_pair.second; + + if (bin.size()) + { + m_allocator->free(bin.back()); + bin.pop_back(); + + dec_held_blocks(); + + return true; + } + } + + return false; + } + + private: + pointer_type get_from_allocator(size_type alloc_sz) + { + pointer_type result = m_allocator->allocate(alloc_sz); + ++m_active_blocks; + + return result; + } + + pointer_type pop_block_from_bin(bin_t &bin, size_type size) + { + pointer_type result = bin.back(); + bin.pop_back(); + + dec_held_blocks(); + ++m_active_blocks; + + return result; + } + }; + + + template + class pooled_allocation : public noncopyable + { + public: + typedef Pool pool_type; + typedef typename Pool::pointer_type pointer_type; + typedef typename Pool::size_type size_type; + + private: + std::shared_ptr m_pool; + + pointer_type m_ptr; + size_type m_size; + bool m_valid; + + public: + pooled_allocation(std::shared_ptr p, size_type size) + : m_pool(p), m_ptr(p->allocate(size)), m_size(size), m_valid(true) + { } + + ~pooled_allocation() + { + if (m_valid) + free(); + } + + void free() + { + if (m_valid) + { + m_pool->free(m_ptr, m_size); + m_valid = false; + } + else + throw PYGPU_PACKAGE::error( + "pooled_device_allocation::free", +#ifdef PYGPU_PYCUDA + CUDA_ERROR_INVALID_HANDLE +#endif +#ifdef PYGPU_PYOPENCL + CL_INVALID_VALUE +#endif + ); + } + + pointer_type ptr() const + { return m_ptr; } + + size_type size() const + { return m_size; } + }; +} + + + + +#endif diff --git a/src/numpy_init.hpp b/src/numpy_init.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9d6393f480471d5829b226bef5f0d6d61c6643d8 --- /dev/null +++ b/src/numpy_init.hpp @@ -0,0 +1,35 @@ +#ifndef _FAYHVVAAA_PYOPENCL_HEADER_SEEN_NUMPY_INIT_HPP +#define _FAYHVVAAA_PYOPENCL_HEADER_SEEN_NUMPY_INIT_HPP + + +// #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#include + + +namespace +{ + static struct pyublas_array_importer + { + static bool do_import_array() + { +#ifdef PYPY_VERSION + import_array(); +#else + import_array1(false); +#endif + return true; + } + + pyublas_array_importer() + { + if (!do_import_array()) + throw std::runtime_error("numpy failed to initialize"); + } + } _array_importer; +} + + + + +#endif diff --git a/src/c_wrapper/pyopencl_ext.h b/src/pyopencl_ext.h similarity index 100% rename from src/c_wrapper/pyopencl_ext.h rename to src/pyopencl_ext.h diff --git a/src/tools.hpp b/src/tools.hpp new file mode 100644 index 0000000000000000000000000000000000000000..935dab7ac03daf52a0610e6a202b65503856da1c --- /dev/null +++ b/src/tools.hpp @@ -0,0 +1,65 @@ +#ifndef _ASDFDAFVVAFF_PYCUDA_HEADER_SEEN_TOOLS_HPP +#define _ASDFDAFVVAFF_PYCUDA_HEADER_SEEN_TOOLS_HPP + + +#include + +#include +#include "numpy_init.hpp" + + + + +namespace pyopencl +{ + inline + npy_intp size_from_dims(int ndim, const npy_intp *dims) + { + if (ndim != 0) + return std::accumulate(dims, dims+ndim, 1, std::multiplies()); + else + return 1; + } + + + + + inline void run_python_gc() + { + namespace py = pybind11; + + py::module::import("gc").attr("collect")(); + } + + + // https://stackoverflow.com/a/28139075 + template + struct reversion_wrapper { T& iterable; }; + + template + auto begin (reversion_wrapper w) { return w.iterable.rbegin(); } + + template + auto end (reversion_wrapper w) { return w.iterable.rend(); } + + template + reversion_wrapper reverse (T&& iterable) { return { iterable }; } + + + // https://stackoverflow.com/a/44175911 + class noncopyable { + public: + noncopyable() = default; + ~noncopyable() = default; + + private: + noncopyable(const noncopyable&) = delete; + noncopyable& operator=(const noncopyable&) = delete; + }; +} + + + + + +#endif diff --git a/src/wrap_cl.cpp b/src/wrap_cl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..29b546e7a07441323cdf6614907b2fe5995f5915 --- /dev/null +++ b/src/wrap_cl.cpp @@ -0,0 +1,24 @@ +#include "wrap_cl.hpp" + + + + +using namespace pyopencl; + + + + +extern void pyopencl_expose_constants(py::module &m); +extern void pyopencl_expose_part_1(py::module &m); +extern void pyopencl_expose_part_2(py::module &m); +extern void pyopencl_expose_mempool(py::module &m); + +PYBIND11_MODULE(_cl, m) +{ + pyopencl_expose_constants(m); + pyopencl_expose_part_1(m); + pyopencl_expose_part_2(m); + pyopencl_expose_mempool(m); +} + +// vim: foldmethod=marker diff --git a/src/wrap_cl.hpp b/src/wrap_cl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ace4bd2556a37541dcb5cfdafdc5c9df963bd31e --- /dev/null +++ b/src/wrap_cl.hpp @@ -0,0 +1,4898 @@ +#ifndef _AFJHAYYTA_PYOPENCL_HEADER_SEEN_WRAP_CL_HPP +#define _AFJHAYYTA_PYOPENCL_HEADER_SEEN_WRAP_CL_HPP + +// CL 1.2 undecided: +// clSetPrintfCallback + +// {{{ includes + +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +// #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + +#ifdef __APPLE__ + +// Mac ------------------------------------------------------------------------ +#include +#include "pyopencl_ext.h" +#ifdef HAVE_GL + +#define PYOPENCL_GL_SHARING_VERSION 1 + +#include +#include +#include +#endif + +#else + +// elsewhere ------------------------------------------------------------------ +#define CL_TARGET_OPENCL_VERSION 220 + +#include +#include "pyopencl_ext.h" + +#if defined(_WIN32) +#define NOMINMAX +#include +#endif + +#ifdef HAVE_GL +#include +#include +#endif + +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) +#define PYOPENCL_GL_SHARING_VERSION cl_khr_gl_sharing +#endif + +#endif + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "wrap_helpers.hpp" +#include "numpy_init.hpp" +#include "tools.hpp" + +#ifdef PYOPENCL_PRETEND_CL_VERSION +#define PYOPENCL_CL_VERSION PYOPENCL_PRETEND_CL_VERSION +#else + +#if defined(CL_VERSION_2_2) +#define PYOPENCL_CL_VERSION 0x2020 +#elif defined(CL_VERSION_2_1) +#define PYOPENCL_CL_VERSION 0x2010 +#elif defined(CL_VERSION_2_0) +#define PYOPENCL_CL_VERSION 0x2000 +#elif defined(CL_VERSION_1_2) +#define PYOPENCL_CL_VERSION 0x1020 +#elif defined(CL_VERSION_1_1) +#define PYOPENCL_CL_VERSION 0x1010 +#else +#define PYOPENCL_CL_VERSION 0x1000 +#endif + +#endif + + +#if (PY_VERSION_HEX >= 0x03000000) or defined(PYPY_VERSION) +#define PYOPENCL_USE_NEW_BUFFER_INTERFACE +#define PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(s) std::move(s) +#else +#define PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(s) (s) +#endif + + + +// }}} + + + + + +// {{{ tools +#if PY_VERSION_HEX >= 0x02050000 + typedef Py_ssize_t PYOPENCL_BUFFER_SIZE_T; +#else + typedef int PYOPENCL_BUFFER_SIZE_T; +#endif + +#define PYOPENCL_CAST_BOOL(B) ((B) ? CL_TRUE : CL_FALSE) + + + + + +#define PYOPENCL_DEPRECATED(WHAT, KILL_VERSION, EXTRA_MSG) \ + { \ + PyErr_Warn( \ + PyExc_DeprecationWarning, \ + WHAT " is deprecated and will stop working in PyOpenCL " KILL_VERSION". " \ + EXTRA_MSG); \ + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + +#define PYOPENCL_GET_EXT_FUN(PLATFORM, NAME, VAR) \ + NAME##_fn VAR \ + = (NAME##_fn) \ + clGetExtensionFunctionAddressForPlatform(PLATFORM, #NAME); \ + \ + if (!VAR) \ + throw error(#NAME, CL_INVALID_VALUE, #NAME \ + "not available"); + +#else + +#define PYOPENCL_GET_EXT_FUN(PLATFORM, NAME, VAR) \ + NAME##_fn VAR \ + = (NAME##_fn) \ + clGetExtensionFunctionAddress(#NAME); \ + \ + if (!VAR) \ + throw error(#NAME, CL_INVALID_VALUE, #NAME \ + "not available"); + +#endif + + +#define PYOPENCL_PARSE_PY_DEVICES \ + std::vector devices_vec; \ + cl_uint num_devices; \ + cl_device_id *devices; \ + \ + if (py_devices.ptr() == Py_None) \ + { \ + num_devices = 0; \ + devices = 0; \ + } \ + else \ + { \ + for (py::handle py_dev: py_devices) \ + devices_vec.push_back( \ + (py_dev).cast().data()); \ + num_devices = devices_vec.size(); \ + devices = devices_vec.empty( ) ? nullptr : &devices_vec.front(); \ + } \ + + +#define PYOPENCL_RETRY_RETURN_IF_MEM_ERROR(OPERATION) \ + try \ + { \ + OPERATION \ + } \ + catch (pyopencl::error &e) \ + { \ + if (!e.is_out_of_memory()) \ + throw; \ + } \ + \ + /* If we get here, we got an error from CL. + * We should run the Python GC to try and free up + * some memory references. */ \ + run_python_gc(); \ + \ + /* Now retry the allocation. If it fails again, + * let it fail. */ \ + { \ + OPERATION \ + } + + + + +#define PYOPENCL_RETRY_IF_MEM_ERROR(OPERATION) \ + { \ + bool failed_with_mem_error = false; \ + try \ + { \ + OPERATION \ + } \ + catch (pyopencl::error &e) \ + { \ + failed_with_mem_error = true; \ + if (!e.is_out_of_memory()) \ + throw; \ + } \ + \ + if (failed_with_mem_error) \ + { \ + /* If we get here, we got an error from CL. + * We should run the Python GC to try and free up + * some memory references. */ \ + run_python_gc(); \ + \ + /* Now retry the allocation. If it fails again, + * let it fail. */ \ + { \ + OPERATION \ + } \ + } \ + } + +// }}} + +// {{{ tracing and error reporting +#ifdef PYOPENCL_TRACE + #define PYOPENCL_PRINT_CALL_TRACE(NAME) \ + std::cerr << NAME << std::endl; + #define PYOPENCL_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) \ + std::cerr << NAME << " (" << EXTRA_INFO << ')' << std::endl; +#else + #define PYOPENCL_PRINT_CALL_TRACE(NAME) /*nothing*/ + #define PYOPENCL_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) /*nothing*/ +#endif + +#define PYOPENCL_CALL_GUARDED_THREADED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \ + { \ + PYOPENCL_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \ + cl_int status_code; \ + { \ + py::gil_scoped_release release; \ + status_code = NAME ARGLIST; \ + } \ + if (status_code != CL_SUCCESS) \ + throw pyopencl::error(#NAME, status_code);\ + } + +#define PYOPENCL_CALL_GUARDED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \ + { \ + PYOPENCL_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \ + cl_int status_code; \ + status_code = NAME ARGLIST; \ + if (status_code != CL_SUCCESS) \ + throw pyopencl::error(#NAME, status_code);\ + } + +#define PYOPENCL_CALL_GUARDED_THREADED(NAME, ARGLIST) \ + { \ + PYOPENCL_PRINT_CALL_TRACE(#NAME); \ + cl_int status_code; \ + { \ + py::gil_scoped_release release; \ + status_code = NAME ARGLIST; \ + } \ + if (status_code != CL_SUCCESS) \ + throw pyopencl::error(#NAME, status_code);\ + } + +#define PYOPENCL_CALL_GUARDED(NAME, ARGLIST) \ + { \ + PYOPENCL_PRINT_CALL_TRACE(#NAME); \ + cl_int status_code; \ + status_code = NAME ARGLIST; \ + if (status_code != CL_SUCCESS) \ + throw pyopencl::error(#NAME, status_code);\ + } +#define PYOPENCL_CALL_GUARDED_CLEANUP(NAME, ARGLIST) \ + { \ + PYOPENCL_PRINT_CALL_TRACE(#NAME); \ + cl_int status_code; \ + status_code = NAME ARGLIST; \ + if (status_code != CL_SUCCESS) \ + std::cerr \ + << "PyOpenCL WARNING: a clean-up operation failed (dead context maybe?)" \ + << std::endl \ + << #NAME " failed with code " << status_code \ + << std::endl; \ + } + +// }}} + +// {{{ get_info helpers +#define PYOPENCL_GET_OPAQUE_INFO(WHAT, FIRST_ARG, SECOND_ARG, CL_TYPE, TYPE) \ + { \ + CL_TYPE param_value; \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, sizeof(param_value), ¶m_value, 0)); \ + if (param_value) \ + return py::object(handle_from_new_ptr( \ + new TYPE(param_value, /*retain*/ true))); \ + else \ + return py::none(); \ + } + +#define PYOPENCL_GET_VEC_INFO(WHAT, FIRST_ARG, SECOND_ARG, RES_VEC) \ + { \ + size_t size; \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, 0, 0, &size)); \ + \ + RES_VEC.resize(size / sizeof(RES_VEC.front())); \ + \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, size, \ + RES_VEC.empty( ) ? nullptr : &RES_VEC.front(), &size)); \ + } + +#define PYOPENCL_GET_STR_INFO(WHAT, FIRST_ARG, SECOND_ARG) \ + { \ + size_t param_value_size; \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, 0, 0, ¶m_value_size)); \ + \ + std::vector param_value(param_value_size); \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, param_value_size, \ + param_value.empty( ) ? nullptr : ¶m_value.front(), ¶m_value_size)); \ + \ + return py::cast( \ + param_value.empty( ) ? "" : std::string(¶m_value.front(), param_value_size-1)); \ + } + + + + +#define PYOPENCL_GET_INTEGRAL_INFO(WHAT, FIRST_ARG, SECOND_ARG, TYPE) \ + { \ + TYPE param_value; \ + PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \ + (FIRST_ARG, SECOND_ARG, sizeof(param_value), ¶m_value, 0)); \ + return py::cast(param_value); \ + } + +// }}} + +// {{{ event helpers -------------------------------------------------------------- +#define PYOPENCL_PARSE_WAIT_FOR \ + cl_uint num_events_in_wait_list = 0; \ + std::vector event_wait_list; \ + \ + if (py_wait_for.ptr() != Py_None) \ + { \ + event_wait_list.resize(len(py_wait_for)); \ + for (py::handle evt: py_wait_for) \ + event_wait_list[num_events_in_wait_list++] = \ + evt.cast().data(); \ + } + +#define PYOPENCL_WAITLIST_ARGS \ + num_events_in_wait_list, event_wait_list.empty( ) ? nullptr : &event_wait_list.front() + +#define PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, obj) \ + try \ + { \ + return new nanny_event(evt, false, obj); \ + } \ + catch (...) \ + { \ + clReleaseEvent(evt); \ + throw; \ + } + +#define PYOPENCL_RETURN_NEW_EVENT(evt) \ + try \ + { \ + return new event(evt, false); \ + } \ + catch (...) \ + { \ + clReleaseEvent(evt); \ + throw; \ + } + +// }}} + +// {{{ equality testing +#define PYOPENCL_EQUALITY_TESTS(cls) \ + bool operator==(cls const &other) const \ + { return data() == other.data(); } \ + bool operator!=(cls const &other) const \ + { return data() != other.data(); } \ + long hash() const \ + { return (long) (intptr_t) data(); } +// }}} + + + +namespace pyopencl +{ + // {{{ error + class error : public std::runtime_error + { + private: + std::string m_routine; + cl_int m_code; + + public: + error(const char *routine, cl_int c, const char *msg="") + : std::runtime_error(msg), m_routine(routine), m_code(c) + { } + + const std::string &routine() const + { + return m_routine; + } + + cl_int code() const + { + return m_code; + } + + bool is_out_of_memory() const + { + return (code() == CL_MEM_OBJECT_ALLOCATION_FAILURE + || code() == CL_OUT_OF_RESOURCES + || code() == CL_OUT_OF_HOST_MEMORY); + } + + }; + + // }}} + + + // {{{ buffer interface helper + // +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + class py_buffer_wrapper : public noncopyable + { + private: + bool m_initialized; + + public: + Py_buffer m_buf; + + py_buffer_wrapper() + : m_initialized(false) + {} + + void get(PyObject *obj, int flags) + { +#ifdef PYPY_VERSION + // work around https://bitbucket.org/pypy/pypy/issues/2873 + if (flags & PyBUF_ANY_CONTIGUOUS) + { + int flags_wo_cont = flags & ~PyBUF_ANY_CONTIGUOUS; + if (PyObject_GetBuffer(obj, &m_buf, flags_wo_cont | PyBUF_C_CONTIGUOUS)) + { + PyErr_Clear(); + if (PyObject_GetBuffer(obj, &m_buf, flags_wo_cont | PyBUF_F_CONTIGUOUS)) + throw py::error_already_set(); + } + } + else +#endif + if (PyObject_GetBuffer(obj, &m_buf, flags)) + throw py::error_already_set(); + + m_initialized = true; + } + + virtual ~py_buffer_wrapper() + { + if (m_initialized) + PyBuffer_Release(&m_buf); + } + }; +#endif + + // }}} + + inline + py::tuple get_cl_header_version() + { + return py::make_tuple( + PYOPENCL_CL_VERSION >> (3*4), + (PYOPENCL_CL_VERSION >> (1*4)) & 0xff + ); + } + + + // {{{ platform + + class platform : noncopyable + { + private: + cl_platform_id m_platform; + + public: + platform(cl_platform_id pid) + : m_platform(pid) + { } + + platform(cl_platform_id pid, bool /*retain (ignored)*/) + : m_platform(pid) + { } + + cl_platform_id data() const + { + return m_platform; + } + + PYOPENCL_EQUALITY_TESTS(platform); + + py::object get_info(cl_platform_info param_name) const + { + switch (param_name) + { + case CL_PLATFORM_PROFILE: + case CL_PLATFORM_VERSION: + case CL_PLATFORM_NAME: + case CL_PLATFORM_VENDOR: +#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) + case CL_PLATFORM_EXTENSIONS: +#endif + PYOPENCL_GET_STR_INFO(Platform, m_platform, param_name); + + default: + throw error("Platform.get_info", CL_INVALID_VALUE); + } + } + + py::list get_devices(cl_device_type devtype); + }; + + + + + inline + py::list get_platforms() + { + cl_uint num_platforms = 0; + PYOPENCL_CALL_GUARDED(clGetPlatformIDs, (0, 0, &num_platforms)); + + std::vector platforms(num_platforms); + PYOPENCL_CALL_GUARDED(clGetPlatformIDs, + (num_platforms, platforms.empty( ) ? nullptr : &platforms.front(), &num_platforms)); + + py::list result; + for (cl_platform_id pid: platforms) + result.append(handle_from_new_ptr( + new platform(pid))); + + return result; + } + + // }}} + + + // {{{ device + + class device : noncopyable + { + public: + enum reference_type_t { + REF_NOT_OWNABLE, +#if PYOPENCL_CL_VERSION >= 0x1020 + REF_CL_1_2, +#endif + }; + private: + cl_device_id m_device; + reference_type_t m_ref_type; + + public: + device(cl_device_id did) + : m_device(did), m_ref_type(REF_NOT_OWNABLE) + { } + + device(cl_device_id did, bool retain, reference_type_t ref_type=REF_NOT_OWNABLE) + : m_device(did), m_ref_type(ref_type) + { + if (retain && ref_type != REF_NOT_OWNABLE) + { + if (false) + { } + +#if PYOPENCL_CL_VERSION >= 0x1020 + else if (ref_type == REF_CL_1_2) + { + PYOPENCL_CALL_GUARDED(clRetainDevice, (did)); + } +#endif + + else + throw error("Device", CL_INVALID_VALUE, + "cannot own references to devices when device fission or CL 1.2 is not available"); + } + } + + ~device() + { +#if PYOPENCL_CL_VERSION >= 0x1020 + if (m_ref_type == REF_CL_1_2) + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseDevice, (m_device)); +#endif + } + + cl_device_id data() const + { + return m_device; + } + + PYOPENCL_EQUALITY_TESTS(device); + + py::object get_info(cl_device_info param_name) const + { +#define DEV_GET_INT_INF(TYPE) \ + PYOPENCL_GET_INTEGRAL_INFO(Device, m_device, param_name, TYPE); + + switch (param_name) + { + case CL_DEVICE_TYPE: DEV_GET_INT_INF(cl_device_type); + case CL_DEVICE_VENDOR_ID: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_COMPUTE_UNITS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_WORK_GROUP_SIZE: DEV_GET_INT_INF(size_t); + + case CL_DEVICE_MAX_WORK_ITEM_SIZES: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + PYOPENCL_RETURN_VECTOR(size_t, result); + } + + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: DEV_GET_INT_INF(cl_uint); + + case CL_DEVICE_MAX_CLOCK_FREQUENCY: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_ADDRESS_BITS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_READ_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_MEM_ALLOC_SIZE: DEV_GET_INT_INF(cl_ulong); + case CL_DEVICE_IMAGE2D_MAX_WIDTH: DEV_GET_INT_INF(size_t); + case CL_DEVICE_IMAGE2D_MAX_HEIGHT: DEV_GET_INT_INF(size_t); + case CL_DEVICE_IMAGE3D_MAX_WIDTH: DEV_GET_INT_INF(size_t); + case CL_DEVICE_IMAGE3D_MAX_HEIGHT: DEV_GET_INT_INF(size_t); + case CL_DEVICE_IMAGE3D_MAX_DEPTH: DEV_GET_INT_INF(size_t); + case CL_DEVICE_IMAGE_SUPPORT: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_MAX_PARAMETER_SIZE: DEV_GET_INT_INF(size_t); + case CL_DEVICE_MAX_SAMPLERS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MEM_BASE_ADDR_ALIGN: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_SINGLE_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config); +#ifdef CL_DEVICE_DOUBLE_FP_CONFIG + case CL_DEVICE_DOUBLE_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config); +#endif +#ifdef CL_DEVICE_HALF_FP_CONFIG + case CL_DEVICE_HALF_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config); +#endif + + case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: DEV_GET_INT_INF(cl_device_mem_cache_type); + case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: DEV_GET_INT_INF(cl_ulong); + case CL_DEVICE_GLOBAL_MEM_SIZE: DEV_GET_INT_INF(cl_ulong); + + case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: DEV_GET_INT_INF(cl_ulong); + case CL_DEVICE_MAX_CONSTANT_ARGS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_LOCAL_MEM_TYPE: DEV_GET_INT_INF(cl_device_local_mem_type); + case CL_DEVICE_LOCAL_MEM_SIZE: DEV_GET_INT_INF(cl_ulong); + case CL_DEVICE_ERROR_CORRECTION_SUPPORT: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_PROFILING_TIMER_RESOLUTION: DEV_GET_INT_INF(size_t); + case CL_DEVICE_ENDIAN_LITTLE: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_AVAILABLE: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_COMPILER_AVAILABLE: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_EXECUTION_CAPABILITIES: DEV_GET_INT_INF(cl_device_exec_capabilities); +#if PYOPENCL_CL_VERSION >= 0x2000 + case CL_DEVICE_QUEUE_ON_HOST_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties); +#else + case CL_DEVICE_QUEUE_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties); +#endif + + case CL_DEVICE_NAME: + case CL_DEVICE_VENDOR: + case CL_DRIVER_VERSION: + case CL_DEVICE_PROFILE: + case CL_DEVICE_VERSION: + case CL_DEVICE_EXTENSIONS: + PYOPENCL_GET_STR_INFO(Device, m_device, param_name); + + case CL_DEVICE_PLATFORM: + PYOPENCL_GET_OPAQUE_INFO(Device, m_device, param_name, cl_platform_id, platform); + +#if PYOPENCL_CL_VERSION >= 0x1010 + case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: DEV_GET_INT_INF(cl_uint); + + case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: DEV_GET_INT_INF(cl_uint); + + case CL_DEVICE_HOST_UNIFIED_MEMORY: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_OPENCL_C_VERSION: + PYOPENCL_GET_STR_INFO(Device, m_device, param_name); +#endif +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV + case CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV: + case CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV: + case CL_DEVICE_REGISTERS_PER_BLOCK_NV: + case CL_DEVICE_WARP_SIZE_NV: + DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_GPU_OVERLAP_NV: + case CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV: + case CL_DEVICE_INTEGRATED_MEMORY_NV: + DEV_GET_INT_INF(cl_bool); +#endif +#ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV + case CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV: + DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_PCI_BUS_ID_NV + case CL_DEVICE_PCI_BUS_ID_NV: + DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_PCI_SLOT_ID_NV + case CL_DEVICE_PCI_SLOT_ID_NV: + DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD + case CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD: DEV_GET_INT_INF(cl_bool); +#endif +#ifdef CL_DEVICE_GFXIP_MAJOR_AMD + case CL_DEVICE_GFXIP_MAJOR_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_GFXIP_MINOR_AMD + case CL_DEVICE_GFXIP_MINOR_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD + case CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_DEVICE_LINKER_AVAILABLE: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_BUILT_IN_KERNELS: + PYOPENCL_GET_STR_INFO(Device, m_device, param_name); + case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: DEV_GET_INT_INF(size_t); + case CL_DEVICE_IMAGE_MAX_ARRAY_SIZE: DEV_GET_INT_INF(size_t); + case CL_DEVICE_PARENT_DEVICE: + PYOPENCL_GET_OPAQUE_INFO(Device, m_device, param_name, cl_device_id, device); + case CL_DEVICE_PARTITION_MAX_SUB_DEVICES: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PARTITION_TYPE: + case CL_DEVICE_PARTITION_PROPERTIES: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + PYOPENCL_RETURN_VECTOR(cl_device_partition_property, result); + } + case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: + { +#if defined(__GNUG__) && !defined(__clang__) +#pragma GCC diagnostic push +// what's being ignored here is an alignment attribute to native size, which +// shouldn't matter on the relevant ABIs that I'm aware of. +#pragma GCC diagnostic ignored "-Wignored-attributes" +#endif + std::vector result; +#if defined(__GNUG__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + PYOPENCL_RETURN_VECTOR(cl_device_affinity_domain, result); + } + case CL_DEVICE_REFERENCE_COUNT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC: DEV_GET_INT_INF(cl_bool); + case CL_DEVICE_PRINTF_BUFFER_SIZE: DEV_GET_INT_INF(cl_bool); +#endif +// {{{ AMD dev attrs cl_amd_device_attribute_query +// +// types of AMD dev attrs divined from +// https://www.khronos.org/registry/cl/api/1.2/cl.hpp +#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD + case CL_DEVICE_PROFILING_TIMER_OFFSET_AMD: DEV_GET_INT_INF(cl_ulong); +#endif +/* FIXME +#ifdef CL_DEVICE_TOPOLOGY_AMD + case CL_DEVICE_TOPOLOGY_AMD: +#endif +*/ +#ifdef CL_DEVICE_BOARD_NAME_AMD + case CL_DEVICE_BOARD_NAME_AMD: ; + PYOPENCL_GET_STR_INFO(Device, m_device, param_name); +#endif +#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD + case CL_DEVICE_GLOBAL_FREE_MEMORY_AMD: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + PYOPENCL_RETURN_VECTOR(size_t, result); + } +#endif +#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD + case CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD + case CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD + case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD + case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD + case CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD + case CL_DEVICE_LOCAL_MEM_BANKS_AMD: DEV_GET_INT_INF(cl_uint); +#endif +// }}} + +#ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT + case CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT: DEV_GET_INT_INF(cl_uint); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + case CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE: DEV_GET_INT_INF(size_t); + case CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties); + case CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_ON_DEVICE_QUEUES: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_MAX_ON_DEVICE_EVENTS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_SVM_CAPABILITIES: DEV_GET_INT_INF(cl_device_svm_capabilities); + case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: DEV_GET_INT_INF(size_t); + case CL_DEVICE_MAX_PIPE_ARGS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PIPE_MAX_PACKET_SIZE: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT: DEV_GET_INT_INF(cl_uint); +#endif +#if PYOPENCL_CL_VERSION >= 0x2010 + case CL_DEVICE_IL_VERSION: + PYOPENCL_GET_STR_INFO(Device, m_device, param_name); + case CL_DEVICE_MAX_NUM_SUB_GROUPS: DEV_GET_INT_INF(cl_uint); + case CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: DEV_GET_INT_INF(cl_bool); +#endif +#ifdef CL_DEVICE_ME_VERSION_INTEL + case CL_DEVICE_ME_VERSION_INTEL: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM + case CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_PAGE_SIZE_QCOM + case CL_DEVICE_PAGE_SIZE_QCOM: DEV_GET_INT_INF(cl_uint); +#endif +#ifdef CL_DEVICE_SPIR_VERSIONS + case CL_DEVICE_SPIR_VERSIONS: + PYOPENCL_GET_STR_INFO(Device, m_device, param_name); +#endif +#ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA + case CL_DEVICE_CORE_TEMPERATURE_ALTERA: DEV_GET_INT_INF(cl_int); +#endif + +#ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL + case CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result); + PYOPENCL_RETURN_VECTOR(cl_uint, result); + } +#endif +#ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL + case CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL: DEV_GET_INT_INF(cl_uint); +#endif + + default: + throw error("Device.get_info", CL_INVALID_VALUE); + } + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + py::list create_sub_devices(py::object py_properties) + { + std::vector properties; + + COPY_PY_LIST(cl_device_partition_property, properties); + properties.push_back(0); + + cl_device_partition_property *props_ptr + = properties.empty( ) ? nullptr : &properties.front(); + + cl_uint num_entries; + PYOPENCL_CALL_GUARDED(clCreateSubDevices, + (m_device, props_ptr, 0, nullptr, &num_entries)); + + std::vector result; + result.resize(num_entries); + + PYOPENCL_CALL_GUARDED(clCreateSubDevices, + (m_device, props_ptr, num_entries, &result.front(), nullptr)); + + py::list py_result; + for (cl_device_id did: result) + py_result.append(handle_from_new_ptr( + new pyopencl::device(did, /*retain*/true, + device::REF_CL_1_2))); + return py_result; + } +#endif + + }; + + + + + inline py::list platform::get_devices(cl_device_type devtype) + { + cl_uint num_devices = 0; + PYOPENCL_PRINT_CALL_TRACE("clGetDeviceIDs"); + { + cl_int status_code; + status_code = clGetDeviceIDs(m_platform, devtype, 0, 0, &num_devices); + if (status_code == CL_DEVICE_NOT_FOUND) + num_devices = 0; + else if (status_code != CL_SUCCESS) \ + throw pyopencl::error("clGetDeviceIDs", status_code); + } + + if (num_devices == 0) + return py::list(); + + std::vector devices(num_devices); + PYOPENCL_CALL_GUARDED(clGetDeviceIDs, + (m_platform, devtype, + num_devices, devices.empty( ) ? nullptr : &devices.front(), &num_devices)); + + py::list result; + for (cl_device_id did: devices) + result.append(handle_from_new_ptr( + new device(did))); + + return result; + } + + // }}} + + + // {{{ context + + class context : public noncopyable + { + private: + cl_context m_context; + + public: + context(cl_context ctx, bool retain) + : m_context(ctx) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainContext, (ctx)); + } + + ~context() + { + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseContext, + (m_context)); + } + + cl_context data() const + { + return m_context; + } + + PYOPENCL_EQUALITY_TESTS(context); + + py::object get_info(cl_context_info param_name) const + { + switch (param_name) + { + case CL_CONTEXT_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO( + Context, m_context, param_name, cl_uint); + + case CL_CONTEXT_DEVICES: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Context, m_context, param_name, result); + + py::list py_result; + for (cl_device_id did: result) + py_result.append(handle_from_new_ptr( + new pyopencl::device(did))); + return py_result; + } + + case CL_CONTEXT_PROPERTIES: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Context, m_context, param_name, result); + + py::list py_result; + for (size_t i = 0; i < result.size(); i+=2) + { + cl_context_properties key = result[i]; + py::object value; + switch (key) + { + case CL_CONTEXT_PLATFORM: + { + value = py::object( + handle_from_new_ptr(new platform( + reinterpret_cast(result[i+1])))); + break; + } + +#if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1) +#if defined(__APPLE__) && defined(HAVE_GL) + case CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE: +#else + case CL_GL_CONTEXT_KHR: + case CL_EGL_DISPLAY_KHR: + case CL_GLX_DISPLAY_KHR: + case CL_WGL_HDC_KHR: + case CL_CGL_SHAREGROUP_KHR: +#endif + value = py::cast(result[i+1]); + break; + +#endif + case 0: + break; + + default: + throw error("Context.get_info", CL_INVALID_VALUE, + "unknown context_property key encountered"); + } + + py_result.append(py::make_tuple(result[i], value)); + } + return py_result; + } + +#if PYOPENCL_CL_VERSION >= 0x1010 + case CL_CONTEXT_NUM_DEVICES: + PYOPENCL_GET_INTEGRAL_INFO( + Context, m_context, param_name, cl_uint); +#endif + + default: + throw error("Context.get_info", CL_INVALID_VALUE); + } + } + + + // not exposed to python + int get_hex_platform_version() const + { + std::vector devices; + PYOPENCL_GET_VEC_INFO(Context, m_context, CL_CONTEXT_DEVICES, devices); + + if (devices.size() == 0) + throw error("Context._get_hex_version", CL_INVALID_VALUE, + "platform has no devices"); + + cl_platform_id plat; + + PYOPENCL_CALL_GUARDED(clGetDeviceInfo, + (devices[0], CL_DEVICE_PLATFORM, sizeof(plat), &plat, nullptr)); + + std::string plat_version; + { + size_t param_value_size; + PYOPENCL_CALL_GUARDED(clGetPlatformInfo, + (plat, CL_PLATFORM_VERSION, 0, 0, ¶m_value_size)); + + std::vector param_value(param_value_size); + PYOPENCL_CALL_GUARDED(clGetPlatformInfo, + (plat, CL_PLATFORM_VERSION, param_value_size, + param_value.empty( ) ? nullptr : ¶m_value.front(), ¶m_value_size)); + + plat_version = + param_value.empty( ) ? "" : std::string(¶m_value.front(), param_value_size-1); + } + + int major_ver, minor_ver; + errno = 0; + int match_count = sscanf(plat_version.c_str(), "OpenCL %d.%d ", &major_ver, &minor_ver); + if (errno || match_count != 2) + throw error("Context._get_hex_version", CL_INVALID_VALUE, + "Platform version string did not have expected format"); + + return major_ver << 12 | minor_ver << 4; + } + }; + + + inline + std::vector parse_context_properties( + py::object py_properties) + { + std::vector props; + + if (py_properties.ptr() != Py_None) + { + for (py::handle prop_tuple_py: py_properties) + { + py::tuple prop_tuple(prop_tuple_py.cast()); + + if (len(prop_tuple) != 2) + throw error("Context", CL_INVALID_VALUE, "property tuple must have length 2"); + cl_context_properties prop = prop_tuple[0].cast(); + props.push_back(prop); + + if (prop == CL_CONTEXT_PLATFORM) + { + props.push_back( + reinterpret_cast( + prop_tuple[1].cast().data())); + } +#if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1) +#if defined(_WIN32) + else if (prop == CL_WGL_HDC_KHR) + { + // size_t is a stand-in for HANDLE, hopefully has the same size. + size_t hnd = (prop_tuple[1]).cast(); + props.push_back(hnd); + } +#endif + else if ( +#if defined(__APPLE__) && defined(HAVE_GL) + prop == CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE +#else + prop == CL_GL_CONTEXT_KHR + || prop == CL_EGL_DISPLAY_KHR + || prop == CL_GLX_DISPLAY_KHR + || prop == CL_CGL_SHAREGROUP_KHR +#endif + ) + { + py::object ctypes = py::module::import("ctypes"); + py::object prop = prop_tuple[1], c_void_p = ctypes.attr("c_void_p"); + py::object ptr = ctypes.attr("cast")(prop, c_void_p); + props.push_back(ptr.attr("value").cast()); + } +#endif + else + throw error("Context", CL_INVALID_VALUE, "invalid context property"); + } + props.push_back(0); + } + + return props; + } + + + inline + context *create_context_inner(py::object py_devices, py::object py_properties, + py::object py_dev_type) + { + std::vector props + = parse_context_properties(py_properties); + + cl_context_properties *props_ptr + = props.empty( ) ? nullptr : &props.front(); + + cl_int status_code; + + cl_context ctx; + + // from device list + if (py_devices.ptr() != Py_None) + { + if (py_dev_type.ptr() != Py_None) + throw error("Context", CL_INVALID_VALUE, + "one of 'devices' or 'dev_type' must be None"); + + std::vector devices; + for (py::handle py_dev: py_devices) + devices.push_back(py_dev.cast().data()); + + PYOPENCL_PRINT_CALL_TRACE("clCreateContext"); + ctx = clCreateContext( + props_ptr, + devices.size(), + devices.empty( ) ? nullptr : &devices.front(), + 0, 0, &status_code); + } + // from dev_type + else + { + cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT; + if (py_dev_type.ptr() != Py_None) + dev_type = py_dev_type.cast(); + + PYOPENCL_PRINT_CALL_TRACE("clCreateContextFromType"); + ctx = clCreateContextFromType(props_ptr, dev_type, 0, 0, &status_code); + } + + if (status_code != CL_SUCCESS) + throw pyopencl::error("Context", status_code); + + try + { + return new context(ctx, false); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseContext, (ctx)); + throw; + } + } + + + inline + context *create_context(py::object py_devices, py::object py_properties, + py::object py_dev_type) + { + PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( + return create_context_inner(py_devices, py_properties, py_dev_type); + ) + } + + // }}} + + + // {{{ command_queue + + class command_queue + { + private: + cl_command_queue m_queue; + + public: + command_queue(cl_command_queue q, bool retain) + : m_queue(q) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (q)); + } + + command_queue(command_queue const &src) + : m_queue(src.m_queue) + { + PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue)); + } + + command_queue( + const context &ctx, + const device *py_dev=nullptr, + py::object py_props=py::none()) + { + cl_device_id dev; + if (py_dev) + dev = py_dev->data(); + else + { + std::vector devs; + PYOPENCL_GET_VEC_INFO(Context, ctx.data(), CL_CONTEXT_DEVICES, devs); + if (devs.size() == 0) + throw pyopencl::error("CommandQueue", CL_INVALID_VALUE, + "context doesn't have any devices? -- don't know which one to default to"); + dev = devs[0]; + } + + int hex_plat_version = ctx.get_hex_platform_version(); + + bool props_given_as_numeric; + cl_command_queue_properties num_props; + if (py_props.is_none()) + { + num_props = 0; + props_given_as_numeric = true; + } + else + { + try + { + num_props = py::cast(py_props); + props_given_as_numeric = true; + } + catch (py::cast_error &) + { + props_given_as_numeric = false; + } + } + + if (props_given_as_numeric) + { +#if PYOPENCL_CL_VERSION >= 0x2000 + if (hex_plat_version >= 0x2000) + { + cl_queue_properties props_list[] = { CL_QUEUE_PROPERTIES, num_props, 0 }; + + cl_int status_code; + + PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueueWithProperties"); + m_queue = clCreateCommandQueueWithProperties( + ctx.data(), dev, props_list, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("CommandQueue", status_code); + } + else +#endif + { + cl_int status_code; + + PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueue"); +#if defined(__GNUG__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + m_queue = clCreateCommandQueue( + ctx.data(), dev, num_props, &status_code); +#if defined(__GNUG__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + if (status_code != CL_SUCCESS) + throw pyopencl::error("CommandQueue", status_code); + } + } + else + { +#if PYOPENCL_CL_VERSION >= 0x2000 + throw error("CommandQueue", CL_INVALID_VALUE, + "queue properties given as an iterable, " + "which is only allowed when PyOpenCL was built " + "against an OpenCL 2+ header"); + + if (hex_plat_version < 0x2000) + { + std::cerr << + "queue properties given as an iterable, " + "which uses an OpenCL 2+-only interface, " + "but the context's platform does not " + "declare OpenCL 2 support. Proceeding " + "as requested, but the next thing you see " + "may be a crash." << std:: endl; + } + + cl_queue_properties props[py::len(py_props) + 1]; + { + size_t i = 0; + for (auto prop: py_props) + props[i++] = py::cast(prop); + props[i++] = 0; + } + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueueWithProperties"); + m_queue = clCreateCommandQueueWithProperties( + ctx.data(), dev, props, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("CommandQueue", status_code); +#endif + } + } + + ~command_queue() + { + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseCommandQueue, + (m_queue)); + } + + const cl_command_queue data() const + { return m_queue; } + + PYOPENCL_EQUALITY_TESTS(command_queue); + + py::object get_info(cl_command_queue_info param_name) const + { + switch (param_name) + { + case CL_QUEUE_CONTEXT: + PYOPENCL_GET_OPAQUE_INFO(CommandQueue, m_queue, param_name, + cl_context, context); + case CL_QUEUE_DEVICE: + PYOPENCL_GET_OPAQUE_INFO(CommandQueue, m_queue, param_name, + cl_device_id, device); + case CL_QUEUE_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(CommandQueue, m_queue, param_name, + cl_uint); + case CL_QUEUE_PROPERTIES: + PYOPENCL_GET_INTEGRAL_INFO(CommandQueue, m_queue, param_name, + cl_command_queue_properties); + + default: + throw error("CommandQueue.get_info", CL_INVALID_VALUE); + } + } + + std::unique_ptr get_context() const + { + cl_context param_value; + PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, + (m_queue, CL_QUEUE_CONTEXT, sizeof(param_value), ¶m_value, 0)); + return std::unique_ptr( + new context(param_value, /*retain*/ true)); + } + +#if PYOPENCL_CL_VERSION < 0x1010 + cl_command_queue_properties set_property( + cl_command_queue_properties prop, + bool enable) + { + cl_command_queue_properties old_prop; + PYOPENCL_CALL_GUARDED(clSetCommandQueueProperty, + (m_queue, prop, PYOPENCL_CAST_BOOL(enable), &old_prop)); + return old_prop; + } +#endif + + void flush() + { PYOPENCL_CALL_GUARDED(clFlush, (m_queue)); } + void finish() + { PYOPENCL_CALL_GUARDED_THREADED(clFinish, (m_queue)); } + }; + + // }}} + + + // {{{ event/synchronization + + class event : noncopyable + { + private: + cl_event m_event; + + public: + event(cl_event event, bool retain) + : m_event(event) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainEvent, (event)); + } + + event(event const &src) + : m_event(src.m_event) + { PYOPENCL_CALL_GUARDED(clRetainEvent, (m_event)); } + + virtual ~event() + { + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseEvent, + (m_event)); + } + + const cl_event data() const + { return m_event; } + + PYOPENCL_EQUALITY_TESTS(event); + + py::object get_info(cl_event_info param_name) const + { + switch (param_name) + { + case CL_EVENT_COMMAND_QUEUE: + PYOPENCL_GET_OPAQUE_INFO(Event, m_event, param_name, + cl_command_queue, command_queue); + case CL_EVENT_COMMAND_TYPE: + PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name, + cl_command_type); + case CL_EVENT_COMMAND_EXECUTION_STATUS: + PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name, + cl_int); + case CL_EVENT_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name, + cl_uint); +#if PYOPENCL_CL_VERSION >= 0x1010 + case CL_EVENT_CONTEXT: + PYOPENCL_GET_OPAQUE_INFO(Event, m_event, param_name, + cl_context, context); +#endif + + default: + throw error("Event.get_info", CL_INVALID_VALUE); + } + } + + py::object get_profiling_info(cl_profiling_info param_name) const + { + switch (param_name) + { + case CL_PROFILING_COMMAND_QUEUED: + case CL_PROFILING_COMMAND_SUBMIT: + case CL_PROFILING_COMMAND_START: + case CL_PROFILING_COMMAND_END: +#if PYOPENCL_CL_VERSION >= 0x2000 + case CL_PROFILING_COMMAND_COMPLETE: +#endif + PYOPENCL_GET_INTEGRAL_INFO(EventProfiling, m_event, param_name, + cl_ulong); + default: + throw error("Event.get_profiling_info", CL_INVALID_VALUE); + } + } + + virtual void wait() + { + PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, (1, &m_event)); + } + +#if PYOPENCL_CL_VERSION >= 0x1010 + // {{{ set_callback, by way of a a thread-based construction + + private: + struct event_callback_info_t + { + std::mutex m_mutex; + std::condition_variable m_condvar; + + py::object m_py_event; + py::object m_py_callback; + + bool m_set_callback_suceeded; + + bool m_notify_thread_wakeup_is_genuine; + + cl_event m_event; + cl_int m_command_exec_status; + + event_callback_info_t(py::object py_event, py::object py_callback) + : m_py_event(py_event), m_py_callback(py_callback), m_set_callback_suceeded(true), + m_notify_thread_wakeup_is_genuine(false) + {} + }; + + static void evt_callback(cl_event evt, cl_int command_exec_status, void *user_data) + { + event_callback_info_t *cb_info = reinterpret_cast(user_data); + { + std::lock_guard lg(cb_info->m_mutex); + cb_info->m_event = evt; + cb_info->m_command_exec_status = command_exec_status; + cb_info->m_notify_thread_wakeup_is_genuine = true; + } + + cb_info->m_condvar.notify_one(); + } + + public: + void set_callback(cl_int command_exec_callback_type, py::object pfn_event_notify) + { + // The reason for doing this via a thread is that we're able to wait on + // acquiring the GIL. (which we can't in the callback) + + std::unique_ptr cb_info_holder( + new event_callback_info_t( + handle_from_new_ptr(new event(*this)), + pfn_event_notify)); + event_callback_info_t *cb_info = cb_info_holder.get(); + + std::thread notif_thread([cb_info]() + { + { + std::unique_lock ulk(cb_info->m_mutex); + cb_info->m_condvar.wait( + ulk, + [&](){ return cb_info->m_notify_thread_wakeup_is_genuine; }); + + // ulk no longer held here, cb_info ready for deletion + } + + { + py::gil_scoped_acquire acquire; + + if (cb_info->m_set_callback_suceeded) + { + try { + cb_info->m_py_callback( + // cb_info->m_py_event, + cb_info->m_command_exec_status); + } + catch (std::exception &exc) + { + std::cerr + << "[pyopencl] event callback handler threw an exception, ignoring: " + << exc.what() + << std::endl; + } + } + + // Need to hold GIL to delete py::object instances in + // event_callback_info_t + delete cb_info; + } + }); + // Thread is away--it is now its responsibility to free cb_info. + cb_info_holder.release(); + + // notif_thread should no longer be coupled to the lifetime of the thread. + notif_thread.detach(); + + try + { + PYOPENCL_CALL_GUARDED(clSetEventCallback, ( + data(), command_exec_callback_type, &event::evt_callback, cb_info)); + } + catch (...) { + // Setting the callback did not succeed. The thread would never + // be woken up. Wake it up to let it know that it can stop. + { + std::lock_guard lg(cb_info->m_mutex); + cb_info->m_set_callback_suceeded = false; + cb_info->m_notify_thread_wakeup_is_genuine = true; + } + cb_info->m_condvar.notify_one(); + throw; + } + } + // }}} +#endif + }; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + class nanny_event : public event + { + // In addition to everything an event does, the nanny event holds a reference + // to a Python object and waits for its own completion upon destruction. + + protected: + std::unique_ptr m_ward; + + public: + + nanny_event(cl_event evt, bool retain, std::unique_ptr &ward) + : event(evt, retain), m_ward(std::move(ward)) + { } + + ~nanny_event() + { wait(); } + + py::object get_ward() const + { + if (m_ward.get()) + { + return py::reinterpret_borrow(m_ward->m_buf.obj); + } + else + return py::none(); + } + + virtual void wait() + { + event::wait(); + m_ward.reset(); + } + }; +#else + class nanny_event : public event + { + // In addition to everything an event does, the nanny event holds a reference + // to a Python object and waits for its own completion upon destruction. + + protected: + py::object m_ward; + + public: + + nanny_event(cl_event evt, bool retain, py::object ward) + : event(evt, retain), m_ward(ward) + { } + + nanny_event(nanny_event const &src) + : event(src), m_ward(src.m_ward) + { } + + ~nanny_event() + { wait(); } + + py::object get_ward() const + { return m_ward; } + + virtual void wait() + { + event::wait(); + m_ward = py::none(); + } + }; +#endif + + + + + inline + void wait_for_events(py::object events) + { + cl_uint num_events_in_wait_list = 0; + std::vector event_wait_list(len(events)); + + for (py::handle evt: events) + event_wait_list[num_events_in_wait_list++] = + evt.cast().data(); + + PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, ( + PYOPENCL_WAITLIST_ARGS)); + } + + + + +#if PYOPENCL_CL_VERSION >= 0x1020 + inline + event *enqueue_marker_with_wait_list(command_queue &cq, + py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + cl_event evt; + + PYOPENCL_CALL_GUARDED(clEnqueueMarkerWithWaitList, ( + cq.data(), PYOPENCL_WAITLIST_ARGS, &evt)); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + inline + event *enqueue_barrier_with_wait_list(command_queue &cq, + py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + cl_event evt; + + PYOPENCL_CALL_GUARDED(clEnqueueBarrierWithWaitList, + (cq.data(), PYOPENCL_WAITLIST_ARGS, &evt)); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } +#endif + + + // {{{ used internally for pre-OpenCL-1.2 contexts + + inline + event *enqueue_marker(command_queue &cq) + { + cl_event evt; + + PYOPENCL_CALL_GUARDED(clEnqueueMarker, ( + cq.data(), &evt)); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + inline + void enqueue_wait_for_events(command_queue &cq, py::object py_events) + { + cl_uint num_events = 0; + std::vector event_list(len(py_events)); + + for (py::handle py_evt: py_events) + event_list[num_events++] = py_evt.cast().data(); + + PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, ( + cq.data(), num_events, event_list.empty( ) ? nullptr : &event_list.front())); + } + + inline + void enqueue_barrier(command_queue &cq) + { + PYOPENCL_CALL_GUARDED(clEnqueueBarrier, (cq.data())); + } + + // }}} + + +#if PYOPENCL_CL_VERSION >= 0x1010 + class user_event : public event + { + public: + user_event(cl_event evt, bool retain) + : event(evt, retain) + { } + + void set_status(cl_int execution_status) + { + PYOPENCL_CALL_GUARDED(clSetUserEventStatus, (data(), execution_status)); + } + }; + + + + + inline + user_event *create_user_event(context &ctx) + { + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateUserEvent"); + cl_event evt = clCreateUserEvent(ctx.data(), &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("UserEvent", status_code); + + try + { + return new user_event(evt, false); + } + catch (...) + { + clReleaseEvent(evt); + throw; + } + } + +#endif + + // }}} + + + // {{{ memory_object + + py::object create_mem_object_wrapper(cl_mem mem, bool retain); + + class memory_object_holder + { + public: + virtual const cl_mem data() const = 0; + + PYOPENCL_EQUALITY_TESTS(memory_object_holder); + + size_t size() const + { + size_t param_value; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (data(), CL_MEM_SIZE, sizeof(param_value), ¶m_value, 0)); + return param_value; + } + + py::object get_info(cl_mem_info param_name) const; + }; + + + + + class memory_object : noncopyable, public memory_object_holder + { + public: +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + typedef std::unique_ptr hostbuf_t; +#else + typedef py::object hostbuf_t; +#endif + + private: + bool m_valid; + cl_mem m_mem; + hostbuf_t m_hostbuf; + + public: + memory_object(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) + : m_valid(true), m_mem(mem) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainMemObject, (mem)); + + m_hostbuf = PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf); + } + + memory_object(memory_object &src) + : m_valid(true), m_mem(src.m_mem), + m_hostbuf(PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(src.m_hostbuf)) + { + PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem)); + } + + memory_object(memory_object_holder const &src) + : m_valid(true), m_mem(src.data()) + { + PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem)); + } + + void release() + { + if (!m_valid) + throw error("MemoryObject.free", CL_INVALID_VALUE, + "trying to double-unref mem object"); + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseMemObject, (m_mem)); + m_valid = false; + } + + virtual ~memory_object() + { + if (m_valid) + release(); + } + + py::object hostbuf() + { +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + if (m_hostbuf.get()) + return py::reinterpret_borrow(m_hostbuf->m_buf.obj); + else + return py::none(); +#else + return m_hostbuf; +#endif + } + + const cl_mem data() const + { return m_mem; } + + }; + +#if PYOPENCL_CL_VERSION >= 0x1020 + inline + event *enqueue_migrate_mem_objects( + command_queue &cq, + py::object py_mem_objects, + cl_mem_migration_flags flags, + py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + + std::vector mem_objects; + for (py::handle mo: py_mem_objects) + mem_objects.push_back(mo.cast().data()); + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueMigrateMemObjects, ( + cq.data(), + mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(), + flags, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_EVENT(evt); + } +#endif + + // }}} + + + // {{{ buffer + + inline cl_mem create_buffer( + cl_context ctx, + cl_mem_flags flags, + size_t size, + void *host_ptr) + { + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateBuffer"); + cl_mem mem = clCreateBuffer(ctx, flags, size, host_ptr, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("create_buffer", status_code); + + return mem; + } + + + + + inline cl_mem create_buffer_gc( + cl_context ctx, + cl_mem_flags flags, + size_t size, + void *host_ptr) + { + PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( + return create_buffer(ctx, flags, size, host_ptr); + ); + } + + + +#if PYOPENCL_CL_VERSION >= 0x1010 + inline cl_mem create_sub_buffer( + cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bct, + const void *buffer_create_info) + { + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateSubBuffer"); + cl_mem mem = clCreateSubBuffer(buffer, flags, + bct, buffer_create_info, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateSubBuffer", status_code); + + return mem; + } + + + + + inline cl_mem create_sub_buffer_gc( + cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bct, + const void *buffer_create_info) + { + PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( + return create_sub_buffer(buffer, flags, bct, buffer_create_info); + ); + } +#endif + + + + class buffer : public memory_object + { + public: + buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) + : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) + { } + +#if PYOPENCL_CL_VERSION >= 0x1010 + buffer *get_sub_region( + size_t origin, size_t size, cl_mem_flags flags) const + { + cl_buffer_region region = { origin, size}; + + cl_mem mem = create_sub_buffer_gc( + data(), flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion); + + try + { + return new buffer(mem, false); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); + throw; + } + } + + buffer *getitem(py::slice slc) const + { + PYOPENCL_BUFFER_SIZE_T start, end, stride, length; + + size_t my_length; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (data(), CL_MEM_SIZE, sizeof(my_length), &my_length, 0)); + +#if PY_VERSION_HEX >= 0x03020000 + if (PySlice_GetIndicesEx(slc.ptr(), +#else + if (PySlice_GetIndicesEx(reinterpret_cast(slc.ptr()), +#endif + my_length, &start, &end, &stride, &length) != 0) + throw py::error_already_set(); + + if (stride != 1) + throw pyopencl::error("Buffer.__getitem__", CL_INVALID_VALUE, + "Buffer slice must have stride 1"); + + cl_mem_flags my_flags; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (data(), CL_MEM_FLAGS, sizeof(my_flags), &my_flags, 0)); + + my_flags &= ~CL_MEM_COPY_HOST_PTR; + + if (end <= start) + throw pyopencl::error("Buffer.__getitem__", CL_INVALID_VALUE, + "Buffer slice have end > start"); + + return get_sub_region(start, end-start, my_flags); + } +#endif + }; + + // {{{ buffer creation + + inline + buffer *create_buffer_py( + context &ctx, + cl_mem_flags flags, + size_t size, + py::object py_hostbuf + ) + { + if (py_hostbuf.ptr() != Py_None && + !(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) + PyErr_Warn(PyExc_UserWarning, "'hostbuf' was passed, " + "but no memory flags to make use of it."); + + void *buf = 0; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr retained_buf_obj; + if (py_hostbuf.ptr() != Py_None) + { + retained_buf_obj = std::unique_ptr(new py_buffer_wrapper); + + int py_buf_flags = PyBUF_ANY_CONTIGUOUS; + if ((flags & CL_MEM_USE_HOST_PTR) + && ((flags & CL_MEM_READ_WRITE) + || (flags & CL_MEM_WRITE_ONLY))) + py_buf_flags |= PyBUF_WRITABLE; + + retained_buf_obj->get(py_hostbuf.ptr(), py_buf_flags); + + buf = retained_buf_obj->m_buf.buf; + + if (size > size_t(retained_buf_obj->m_buf.len)) + throw pyopencl::error("Buffer", CL_INVALID_VALUE, + "specified size is greater than host buffer size"); + if (size == 0) + size = retained_buf_obj->m_buf.len; + } +#else + py::object retained_buf_obj; + if (py_hostbuf.ptr() != Py_None) + { + PYOPENCL_BUFFER_SIZE_T len; + if ((flags & CL_MEM_USE_HOST_PTR) + && ((flags & CL_MEM_READ_WRITE) + || (flags & CL_MEM_WRITE_ONLY))) + { + if (PyObject_AsWriteBuffer(py_hostbuf.ptr(), &buf, &len)) + throw py::error_already_set(); + } + else + { + if (PyObject_AsReadBuffer( + py_hostbuf.ptr(), const_cast(&buf), &len)) + throw py::error_already_set(); + } + + if (flags & CL_MEM_USE_HOST_PTR) + retained_buf_obj = py_hostbuf; + + if (size > size_t(len)) + throw pyopencl::error("Buffer", CL_INVALID_VALUE, + "specified size is greater than host buffer size"); + if (size == 0) + size = len; + } +#endif + + cl_mem mem = create_buffer_gc(ctx.data(), flags, size, buf); + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + if (!(flags & CL_MEM_USE_HOST_PTR)) + retained_buf_obj.reset(); +#endif + + try + { + return new buffer(mem, false, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(retained_buf_obj)); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); + throw; + } + } + + // }}} + + // {{{ buffer transfers + + // {{{ byte-for-byte transfers + + inline + event *enqueue_read_buffer( + command_queue &cq, + memory_object_holder &mem, + py::object buffer, + size_t device_offset, + py::object py_wait_for, + bool is_blocking) + { + PYOPENCL_PARSE_WAIT_FOR; + + void *buf; + PYOPENCL_BUFFER_SIZE_T len; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr ward(new py_buffer_wrapper); + + ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); + + buf = ward->m_buf.buf; + len = ward->m_buf.len; +#else + py::object ward = buffer; + if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED_THREADED(clEnqueueReadBuffer, ( + cq.data(), + mem.data(), + PYOPENCL_CAST_BOOL(is_blocking), + device_offset, len, buf, + PYOPENCL_WAITLIST_ARGS, &evt + )) + ); + PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); + } + + + + + inline + event *enqueue_write_buffer( + command_queue &cq, + memory_object_holder &mem, + py::object buffer, + size_t device_offset, + py::object py_wait_for, + bool is_blocking) + { + PYOPENCL_PARSE_WAIT_FOR; + + const void *buf; + PYOPENCL_BUFFER_SIZE_T len; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr ward(new py_buffer_wrapper); + + ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS); + + buf = ward->m_buf.buf; + len = ward->m_buf.len; +#else + py::object ward = buffer; + if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED_THREADED(clEnqueueWriteBuffer, ( + cq.data(), + mem.data(), + PYOPENCL_CAST_BOOL(is_blocking), + device_offset, len, buf, + PYOPENCL_WAITLIST_ARGS, &evt + )) + ); + PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); + } + + + + + inline + event *enqueue_copy_buffer( + command_queue &cq, + memory_object_holder &src, + memory_object_holder &dst, + ptrdiff_t byte_count, + size_t src_offset, + size_t dst_offset, + py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + + if (byte_count < 0) + { + size_t byte_count_src = 0; + size_t byte_count_dst = 0; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_src, 0)); + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_dst, 0)); + byte_count = std::min(byte_count_src, byte_count_dst); + } + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueCopyBuffer, ( + cq.data(), + src.data(), dst.data(), + src_offset, dst_offset, + byte_count, + PYOPENCL_WAITLIST_ARGS, + &evt + )) + ); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + // }}} + + // {{{ rectangular transfers +#if PYOPENCL_CL_VERSION >= 0x1010 + inline + event *enqueue_read_buffer_rect( + command_queue &cq, + memory_object_holder &mem, + py::object buffer, + py::object py_buffer_origin, + py::object py_host_origin, + py::object py_region, + py::sequence py_buffer_pitches, + py::sequence py_host_pitches, + py::object py_wait_for, + bool is_blocking + ) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(buffer_origin); + COPY_PY_COORD_TRIPLE(host_origin); + COPY_PY_REGION_TRIPLE(region); + COPY_PY_PITCH_TUPLE(buffer_pitches); + COPY_PY_PITCH_TUPLE(host_pitches); + + void *buf; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr ward(new py_buffer_wrapper); + + ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); + + buf = ward->m_buf.buf; +#else + py::object ward = buffer; + + PYOPENCL_BUFFER_SIZE_T len; + if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED_THREADED(clEnqueueReadBufferRect, ( + cq.data(), + mem.data(), + PYOPENCL_CAST_BOOL(is_blocking), + buffer_origin, host_origin, region, + buffer_pitches[0], buffer_pitches[1], + host_pitches[0], host_pitches[1], + buf, + PYOPENCL_WAITLIST_ARGS, &evt + )) + ); + PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); + } + + + + + inline + event *enqueue_write_buffer_rect( + command_queue &cq, + memory_object_holder &mem, + py::object buffer, + py::object py_buffer_origin, + py::object py_host_origin, + py::object py_region, + py::sequence py_buffer_pitches, + py::sequence py_host_pitches, + py::object py_wait_for, + bool is_blocking + ) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(buffer_origin); + COPY_PY_COORD_TRIPLE(host_origin); + COPY_PY_REGION_TRIPLE(region); + COPY_PY_PITCH_TUPLE(buffer_pitches); + COPY_PY_PITCH_TUPLE(host_pitches); + + const void *buf; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr ward(new py_buffer_wrapper); + + ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS); + + buf = ward->m_buf.buf; +#else + py::object ward = buffer; + PYOPENCL_BUFFER_SIZE_T len; + if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED_THREADED(clEnqueueWriteBufferRect, ( + cq.data(), + mem.data(), + PYOPENCL_CAST_BOOL(is_blocking), + buffer_origin, host_origin, region, + buffer_pitches[0], buffer_pitches[1], + host_pitches[0], host_pitches[1], + buf, + PYOPENCL_WAITLIST_ARGS, &evt + )) + ); + PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); + } + + + + + inline + event *enqueue_copy_buffer_rect( + command_queue &cq, + memory_object_holder &src, + memory_object_holder &dst, + py::object py_src_origin, + py::object py_dst_origin, + py::object py_region, + py::sequence py_src_pitches, + py::sequence py_dst_pitches, + py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(src_origin); + COPY_PY_COORD_TRIPLE(dst_origin); + COPY_PY_REGION_TRIPLE(region); + COPY_PY_PITCH_TUPLE(src_pitches); + COPY_PY_PITCH_TUPLE(dst_pitches); + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueCopyBufferRect, ( + cq.data(), + src.data(), dst.data(), + src_origin, dst_origin, region, + src_pitches[0], src_pitches[1], + dst_pitches[0], dst_pitches[1], + PYOPENCL_WAITLIST_ARGS, + &evt + )) + ); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + +#endif + + // }}} + + // }}} + +#if PYOPENCL_CL_VERSION >= 0x1020 + inline + event *enqueue_fill_buffer( + command_queue &cq, + memory_object_holder &mem, + py::object pattern, + size_t offset, + size_t size, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + const void *pattern_buf; + PYOPENCL_BUFFER_SIZE_T pattern_len; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr ward(new py_buffer_wrapper); + + ward->get(pattern.ptr(), PyBUF_ANY_CONTIGUOUS); + + pattern_buf = ward->m_buf.buf; + pattern_len = ward->m_buf.len; +#else + if (PyObject_AsReadBuffer(pattern.ptr(), &pattern_buf, &pattern_len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueFillBuffer, ( + cq.data(), + mem.data(), + pattern_buf, pattern_len, offset, size, + PYOPENCL_WAITLIST_ARGS, &evt + )) + ); + PYOPENCL_RETURN_NEW_EVENT(evt); + } +#endif + + // }}} + + + // {{{ image + + class image : public memory_object + { + public: + image(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) + : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) + { } + + py::object get_image_info(cl_image_info param_name) const + { + switch (param_name) + { + case CL_IMAGE_FORMAT: + PYOPENCL_GET_INTEGRAL_INFO(Image, data(), param_name, + cl_image_format); + case CL_IMAGE_ELEMENT_SIZE: + case CL_IMAGE_ROW_PITCH: + case CL_IMAGE_SLICE_PITCH: + case CL_IMAGE_WIDTH: + case CL_IMAGE_HEIGHT: + case CL_IMAGE_DEPTH: +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_IMAGE_ARRAY_SIZE: +#endif + PYOPENCL_GET_INTEGRAL_INFO(Image, data(), param_name, size_t); + +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_IMAGE_BUFFER: + { + cl_mem param_value; + PYOPENCL_CALL_GUARDED(clGetImageInfo, \ + (data(), param_name, sizeof(param_value), ¶m_value, 0)); + if (param_value == 0) + { + // no associated memory object? no problem. + return py::none(); + } + + return create_mem_object_wrapper(param_value, /* retain */ true); + } + + case CL_IMAGE_NUM_MIP_LEVELS: + case CL_IMAGE_NUM_SAMPLES: + PYOPENCL_GET_INTEGRAL_INFO(Image, data(), param_name, cl_uint); +#endif + + default: + throw error("MemoryObject.get_image_info", CL_INVALID_VALUE); + } + } + }; + + + + + // {{{ image formats + + inline + cl_image_format *make_image_format(cl_channel_order ord, cl_channel_type tp) + { + std::unique_ptr result(new cl_image_format); + result->image_channel_order = ord; + result->image_channel_data_type = tp; + return result.release(); + } + + inline + py::list get_supported_image_formats( + context const &ctx, + cl_mem_flags flags, + cl_mem_object_type image_type) + { + cl_uint num_image_formats; + PYOPENCL_CALL_GUARDED(clGetSupportedImageFormats, ( + ctx.data(), flags, image_type, + 0, nullptr, &num_image_formats)); + + std::vector formats(num_image_formats); + PYOPENCL_CALL_GUARDED(clGetSupportedImageFormats, ( + ctx.data(), flags, image_type, + formats.size(), formats.empty( ) ? nullptr : &formats.front(), nullptr)); + + PYOPENCL_RETURN_VECTOR(cl_image_format, formats); + } + + inline + cl_uint get_image_format_channel_count(cl_image_format const &fmt) + { + switch (fmt.image_channel_order) + { + case CL_R: return 1; + case CL_A: return 1; + case CL_RG: return 2; + case CL_RA: return 2; + case CL_RGB: return 3; + case CL_RGBA: return 4; + case CL_BGRA: return 4; + case CL_INTENSITY: return 1; + case CL_LUMINANCE: return 1; + default: + throw pyopencl::error("ImageFormat.channel_dtype_size", + CL_INVALID_VALUE, + "unrecognized channel order"); + } + } + + inline + cl_uint get_image_format_channel_dtype_size(cl_image_format const &fmt) + { + switch (fmt.image_channel_data_type) + { + case CL_SNORM_INT8: return 1; + case CL_SNORM_INT16: return 2; + case CL_UNORM_INT8: return 1; + case CL_UNORM_INT16: return 2; + case CL_UNORM_SHORT_565: return 2; + case CL_UNORM_SHORT_555: return 2; + case CL_UNORM_INT_101010: return 4; + case CL_SIGNED_INT8: return 1; + case CL_SIGNED_INT16: return 2; + case CL_SIGNED_INT32: return 4; + case CL_UNSIGNED_INT8: return 1; + case CL_UNSIGNED_INT16: return 2; + case CL_UNSIGNED_INT32: return 4; + case CL_HALF_FLOAT: return 2; + case CL_FLOAT: return 4; + default: + throw pyopencl::error("ImageFormat.channel_dtype_size", + CL_INVALID_VALUE, + "unrecognized channel data type"); + } + } + + inline + cl_uint get_image_format_item_size(cl_image_format const &fmt) + { + return get_image_format_channel_count(fmt) + * get_image_format_channel_dtype_size(fmt); + } + + // }}} + + // {{{ image creation + + inline + image *create_image( + context const &ctx, + cl_mem_flags flags, + cl_image_format const &fmt, + py::sequence shape, + py::sequence pitches, + py::object buffer) + { + if (shape.ptr() == Py_None) + throw pyopencl::error("Image", CL_INVALID_VALUE, + "'shape' must be given"); + + void *buf = 0; + PYOPENCL_BUFFER_SIZE_T len = 0; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr retained_buf_obj; + if (buffer.ptr() != Py_None) + { + retained_buf_obj = std::unique_ptr(new py_buffer_wrapper); + + int py_buf_flags = PyBUF_ANY_CONTIGUOUS; + if ((flags & CL_MEM_USE_HOST_PTR) + && ((flags & CL_MEM_READ_WRITE) + || (flags & CL_MEM_WRITE_ONLY))) + py_buf_flags |= PyBUF_WRITABLE; + + retained_buf_obj->get(buffer.ptr(), py_buf_flags); + + buf = retained_buf_obj->m_buf.buf; + len = retained_buf_obj->m_buf.len; + } +#else + py::object retained_buf_obj; + if (buffer.ptr() != Py_None) + { + if ((flags & CL_MEM_USE_HOST_PTR) + && ((flags & CL_MEM_READ_WRITE) + || (flags & CL_MEM_WRITE_ONLY))) + { + if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); + } + else + { + if (PyObject_AsReadBuffer( + buffer.ptr(), const_cast(&buf), &len)) + throw py::error_already_set(); + } + + if (flags & CL_MEM_USE_HOST_PTR) + retained_buf_obj = buffer; + } +#endif + + unsigned dims = py::len(shape); + cl_int status_code; + cl_mem mem; + if (dims == 2) + { + size_t width = (shape[0]).cast(); + size_t height = (shape[1]).cast(); + + size_t pitch = 0; + if (pitches.ptr() != Py_None) + { + if (py::len(pitches) != 1) + throw pyopencl::error("Image", CL_INVALID_VALUE, + "invalid length of pitch tuple"); + pitch = (pitches[0]).cast(); + } + + // check buffer size + cl_int itemsize = get_image_format_item_size(fmt); + if (buf && std::max(pitch, width*itemsize)*height > cl_uint(len)) + throw pyopencl::error("Image", CL_INVALID_VALUE, + "buffer too small"); + + PYOPENCL_PRINT_CALL_TRACE("clCreateImage2D"); + PYOPENCL_RETRY_IF_MEM_ERROR( + { + mem = clCreateImage2D(ctx.data(), flags, &fmt, + width, height, pitch, buf, &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateImage2D", status_code); + } ); + + } + else if (dims == 3) + { + size_t width = (shape[0]).cast(); + size_t height = (shape[1]).cast(); + size_t depth = (shape[2]).cast(); + + size_t pitch_x = 0; + size_t pitch_y = 0; + + if (pitches.ptr() != Py_None) + { + if (py::len(pitches) != 2) + throw pyopencl::error("Image", CL_INVALID_VALUE, + "invalid length of pitch tuple"); + + pitch_x = (pitches[0]).cast(); + pitch_y = (pitches[1]).cast(); + } + + // check buffer size + cl_int itemsize = get_image_format_item_size(fmt); + if (buf && + std::max(std::max(pitch_x, width*itemsize)*height, pitch_y) + * depth > cl_uint(len)) + throw pyopencl::error("Image", CL_INVALID_VALUE, + "buffer too small"); + + PYOPENCL_PRINT_CALL_TRACE("clCreateImage3D"); + PYOPENCL_RETRY_IF_MEM_ERROR( + { + mem = clCreateImage3D(ctx.data(), flags, &fmt, + width, height, depth, pitch_x, pitch_y, buf, &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateImage3D", status_code); + } ); + } + else + throw pyopencl::error("Image", CL_INVALID_VALUE, + "invalid dimension"); + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + if (!(flags & CL_MEM_USE_HOST_PTR)) + retained_buf_obj.reset(); +#endif + + try + { + return new image(mem, false, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(retained_buf_obj)); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); + throw; + } + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + + inline + image *create_image_from_desc( + context const &ctx, + cl_mem_flags flags, + cl_image_format const &fmt, + cl_image_desc &desc, + py::object buffer) + { + if (buffer.ptr() != Py_None && + !(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) + PyErr_Warn(PyExc_UserWarning, "'hostbuf' was passed, " + "but no memory flags to make use of it."); + + void *buf = 0; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr retained_buf_obj; + if (buffer.ptr() != Py_None) + { + retained_buf_obj = std::unique_ptr(new py_buffer_wrapper); + + int py_buf_flags = PyBUF_ANY_CONTIGUOUS; + if ((flags & CL_MEM_USE_HOST_PTR) + && ((flags & CL_MEM_READ_WRITE) + || (flags & CL_MEM_WRITE_ONLY))) + py_buf_flags |= PyBUF_WRITABLE; + + retained_buf_obj->get(buffer.ptr(), py_buf_flags); + + buf = retained_buf_obj->m_buf.buf; + } +#else + py::object retained_buf_obj; + PYOPENCL_BUFFER_SIZE_T len; + if (buffer.ptr() != Py_None) + { + if ((flags & CL_MEM_USE_HOST_PTR) + && ((flags & CL_MEM_READ_WRITE) + || (flags & CL_MEM_WRITE_ONLY))) + { + if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); + } + else + { + if (PyObject_AsReadBuffer( + buffer.ptr(), const_cast(&buf), &len)) + throw py::error_already_set(); + } + + if (flags & CL_MEM_USE_HOST_PTR) + retained_buf_obj = buffer; + } +#endif + + PYOPENCL_PRINT_CALL_TRACE("clCreateImage"); + cl_int status_code; + cl_mem mem = clCreateImage(ctx.data(), flags, &fmt, &desc, buf, &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateImage", status_code); + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + if (!(flags & CL_MEM_USE_HOST_PTR)) + retained_buf_obj.reset(); +#endif + + try + { + return new image(mem, false, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(retained_buf_obj)); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); + throw; + } + } + +#endif + + // }}} + + // {{{ image transfers + + inline + event *enqueue_read_image( + command_queue &cq, + image &img, + py::object py_origin, py::object py_region, + py::object buffer, + size_t row_pitch, size_t slice_pitch, + py::object py_wait_for, + bool is_blocking) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(origin); + COPY_PY_REGION_TRIPLE(region); + + void *buf; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr ward(new py_buffer_wrapper); + + ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); + + buf = ward->m_buf.buf; +#else + py::object ward = buffer; + PYOPENCL_BUFFER_SIZE_T len; + if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + cl_event evt; + + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueReadImage, ( + cq.data(), + img.data(), + PYOPENCL_CAST_BOOL(is_blocking), + origin, region, row_pitch, slice_pitch, buf, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); + } + + + + + inline + event *enqueue_write_image( + command_queue &cq, + image &img, + py::object py_origin, py::object py_region, + py::object buffer, + size_t row_pitch, size_t slice_pitch, + py::object py_wait_for, + bool is_blocking) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(origin); + COPY_PY_REGION_TRIPLE(region); + + const void *buf; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr ward(new py_buffer_wrapper); + + ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS); + + buf = ward->m_buf.buf; +#else + py::object ward = buffer; + PYOPENCL_BUFFER_SIZE_T len; + if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueWriteImage, ( + cq.data(), + img.data(), + PYOPENCL_CAST_BOOL(is_blocking), + origin, region, row_pitch, slice_pitch, buf, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward); + } + + + + + inline + event *enqueue_copy_image( + command_queue &cq, + memory_object_holder &src, + memory_object_holder &dest, + py::object py_src_origin, + py::object py_dest_origin, + py::object py_region, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(src_origin); + COPY_PY_COORD_TRIPLE(dest_origin); + COPY_PY_REGION_TRIPLE(region); + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueCopyImage, ( + cq.data(), src.data(), dest.data(), + src_origin, dest_origin, region, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + + + + inline + event *enqueue_copy_image_to_buffer( + command_queue &cq, + memory_object_holder &src, + memory_object_holder &dest, + py::object py_origin, + py::object py_region, + size_t offset, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(origin); + COPY_PY_REGION_TRIPLE(region); + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueCopyImageToBuffer, ( + cq.data(), src.data(), dest.data(), + origin, region, offset, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + + + + inline + event *enqueue_copy_buffer_to_image( + command_queue &cq, + memory_object_holder &src, + memory_object_holder &dest, + size_t offset, + py::object py_origin, + py::object py_region, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(origin); + COPY_PY_REGION_TRIPLE(region); + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueCopyBufferToImage, ( + cq.data(), src.data(), dest.data(), + offset, origin, region, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + // }}} + +#if PYOPENCL_CL_VERSION >= 0x1020 + inline + event *enqueue_fill_image( + command_queue &cq, + memory_object_holder &mem, + py::object color, + py::object py_origin, py::object py_region, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + COPY_PY_COORD_TRIPLE(origin); + COPY_PY_REGION_TRIPLE(region); + + const void *color_buf; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr ward(new py_buffer_wrapper); + + ward->get(color.ptr(), PyBUF_ANY_CONTIGUOUS); + + color_buf = ward->m_buf.buf; +#else + PYOPENCL_BUFFER_SIZE_T color_len; + if (PyObject_AsReadBuffer(color.ptr(), &color_buf, &color_len)) + throw py::error_already_set(); +#endif + + cl_event evt; + PYOPENCL_RETRY_IF_MEM_ERROR( + PYOPENCL_CALL_GUARDED(clEnqueueFillImage, ( + cq.data(), + mem.data(), + color_buf, origin, region, + PYOPENCL_WAITLIST_ARGS, &evt + )); + ); + PYOPENCL_RETURN_NEW_EVENT(evt); + } +#endif + + // }}} + + + // {{{ maps + class memory_map + { + private: + bool m_valid; + std::shared_ptr m_queue; + memory_object m_mem; + void *m_ptr; + + public: + memory_map(std::shared_ptr cq, memory_object const &mem, void *ptr) + : m_valid(true), m_queue(cq), m_mem(mem), m_ptr(ptr) + { + } + + ~memory_map() + { + if (m_valid) + delete release(0, py::none()); + } + + event *release(command_queue *cq, py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + + if (cq == 0) + cq = m_queue.get(); + + cl_event evt; + PYOPENCL_CALL_GUARDED(clEnqueueUnmapMemObject, ( + cq->data(), m_mem.data(), m_ptr, + PYOPENCL_WAITLIST_ARGS, &evt + )); + + m_valid = false; + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + }; + + + + + // FIXME: Reenable in pypy +#ifndef PYPY_VERSION + inline + py::object enqueue_map_buffer( + std::shared_ptr cq, + memory_object_holder &buf, + cl_map_flags flags, + size_t offset, + py::object py_shape, py::object dtype, + py::object py_order, py::object py_strides, + py::object py_wait_for, + bool is_blocking + ) + { + PYOPENCL_PARSE_WAIT_FOR; + PYOPENCL_PARSE_NUMPY_ARRAY_SPEC; + + npy_uintp size_in_bytes = tp_descr->elsize; + for (npy_intp sdim: shape) + size_in_bytes *= sdim; + + py::object result; + + cl_event evt; + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clEnqueueMapBuffer"); + void *mapped; + + PYOPENCL_RETRY_IF_MEM_ERROR( + { + { + py::gil_scoped_release release; + mapped = clEnqueueMapBuffer( + cq->data(), buf.data(), + PYOPENCL_CAST_BOOL(is_blocking), flags, + offset, size_in_bytes, + PYOPENCL_WAITLIST_ARGS, &evt, + &status_code); + } + if (status_code != CL_SUCCESS) + throw pyopencl::error("clEnqueueMapBuffer", status_code); + } ); + + event evt_handle(evt, false); + + std::unique_ptr map; + try + { + result = py::object(py::reinterpret_steal(PyArray_NewFromDescr( + &PyArray_Type, tp_descr, + shape.size(), + shape.empty() ? nullptr : &shape.front(), + strides.empty() ? nullptr : &strides.front(), + mapped, ary_flags, /*obj*/nullptr))); + + if (size_in_bytes != (npy_uintp) PyArray_NBYTES(result.ptr())) + throw pyopencl::error("enqueue_map_buffer", CL_INVALID_VALUE, + "miscalculated numpy array size (not contiguous?)"); + + map = std::unique_ptr(new memory_map(cq, buf, mapped)); + } + catch (...) + { + PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueUnmapMemObject, ( + cq->data(), buf.data(), mapped, 0, 0, 0)); + throw; + } + + py::object map_py(handle_from_new_ptr(map.release())); + PyArray_BASE(result.ptr()) = map_py.ptr(); + Py_INCREF(map_py.ptr()); + + return py::make_tuple( + result, + handle_from_new_ptr(new event(evt_handle))); + } +#endif + + + + + // FIXME: Reenable in pypy +#ifndef PYPY_VERSION + inline + py::object enqueue_map_image( + std::shared_ptr cq, + memory_object_holder &img, + cl_map_flags flags, + py::object py_origin, + py::object py_region, + py::object py_shape, py::object dtype, + py::object py_order, py::object py_strides, + py::object py_wait_for, + bool is_blocking + ) + { + PYOPENCL_PARSE_WAIT_FOR; + PYOPENCL_PARSE_NUMPY_ARRAY_SPEC; + COPY_PY_COORD_TRIPLE(origin); + COPY_PY_REGION_TRIPLE(region); + + cl_event evt; + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clEnqueueMapImage"); + size_t row_pitch, slice_pitch; + void *mapped; + PYOPENCL_RETRY_IF_MEM_ERROR( + { + { + py::gil_scoped_release release; + mapped = clEnqueueMapImage( + cq->data(), img.data(), + PYOPENCL_CAST_BOOL(is_blocking), flags, + origin, region, &row_pitch, &slice_pitch, + PYOPENCL_WAITLIST_ARGS, &evt, + &status_code); + } + if (status_code != CL_SUCCESS) + throw pyopencl::error("clEnqueueMapImage", status_code); + } ); + + event evt_handle(evt, false); + + std::unique_ptr map; + try + { + map = std::unique_ptr(new memory_map(cq, img, mapped)); + } + catch (...) + { + PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueUnmapMemObject, ( + cq->data(), img.data(), mapped, 0, 0, 0)); + throw; + } + + py::object result = py::reinterpret_steal(PyArray_NewFromDescr( + &PyArray_Type, tp_descr, + shape.size(), + shape.empty() ? nullptr : &shape.front(), + strides.empty() ? nullptr : &strides.front(), + mapped, ary_flags, /*obj*/nullptr)); + + py::object map_py(handle_from_new_ptr(map.release())); + PyArray_BASE(result.ptr()) = map_py.ptr(); + Py_INCREF(map_py.ptr()); + + return py::make_tuple( + result, + handle_from_new_ptr(new event(evt_handle)), + row_pitch, slice_pitch); + } +#endif + + // }}} + + + // {{{ svm + +#if PYOPENCL_CL_VERSION >= 0x2000 + + class svm_arg_wrapper + { + private: + void *m_ptr; + PYOPENCL_BUFFER_SIZE_T m_size; +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr ward; +#endif + + public: + svm_arg_wrapper(py::object holder) + { +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + ward = std::unique_ptr(new py_buffer_wrapper); +#ifdef PYPY_VERSION + // FIXME: get a read-only buffer + // Not quite honest, but Pypy doesn't consider numpy arrays + // created from objects with the __aray_interface__ writeable. + ward->get(holder.ptr(), PyBUF_ANY_CONTIGUOUS); +#else + ward->get(holder.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE); +#endif + m_ptr = ward->m_buf.buf; + m_size = ward->m_buf.len; +#else + py::object ward = holder; + if (PyObject_AsWriteBuffer(holder.ptr(), &m_ptr, &m_size)) + throw py::error_already_set(); +#endif + } + + void *ptr() const + { + return m_ptr; + } + size_t size() const + { + return m_size; + } + }; + + + class svm_allocation : noncopyable + { + private: + std::shared_ptr m_context; + void *m_allocation; + + public: + svm_allocation(std::shared_ptr const &ctx, size_t size, cl_uint alignment, cl_svm_mem_flags flags) + : m_context(ctx) + { + PYOPENCL_PRINT_CALL_TRACE("clSVMalloc"); + m_allocation = clSVMAlloc( + ctx->data(), + flags, size, alignment); + + if (!m_allocation) + throw pyopencl::error("clSVMAlloc", CL_OUT_OF_RESOURCES); + } + + ~svm_allocation() + { + if (m_allocation) + release(); + } + + void release() + { + if (!m_allocation) + throw error("SVMAllocation.release", CL_INVALID_VALUE, + "trying to double-unref svm allocation"); + + clSVMFree(m_context->data(), m_allocation); + m_allocation = nullptr; + } + + void enqueue_release(command_queue &queue, py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + + if (!m_allocation) + throw error("SVMAllocation.release", CL_INVALID_VALUE, + "trying to double-unref svm allocation"); + + cl_event evt; + + PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueSVMFree, ( + queue.data(), 1, &m_allocation, + nullptr, nullptr, + PYOPENCL_WAITLIST_ARGS, &evt)); + + m_allocation = nullptr; + } + + void *ptr() const + { + return m_allocation; + } + + intptr_t ptr_as_int() const + { + return (intptr_t) m_allocation; + } + + bool operator==(svm_allocation const &other) const + { + return m_allocation == other.m_allocation; + } + + bool operator!=(svm_allocation const &other) const + { + return m_allocation != other.m_allocation; + } + }; + + + inline + event *enqueue_svm_memcpy( + command_queue &cq, + cl_bool is_blocking, + svm_arg_wrapper &dst, svm_arg_wrapper &src, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + if (src.size() != dst.size()) + throw error("_enqueue_svm_memcpy", CL_INVALID_VALUE, + "sizes of source and destination buffer do not match"); + + cl_event evt; + PYOPENCL_CALL_GUARDED( + clEnqueueSVMMemcpy, + ( + cq.data(), + is_blocking, + dst.ptr(), src.ptr(), + dst.size(), + PYOPENCL_WAITLIST_ARGS, + &evt + )); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + + inline + event *enqueue_svm_memfill( + command_queue &cq, + svm_arg_wrapper &dst, py::object py_pattern, + py::object byte_count, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + const void *pattern_ptr; + PYOPENCL_BUFFER_SIZE_T pattern_len; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + std::unique_ptr pattern_ward(new py_buffer_wrapper); + + pattern_ward->get(py_pattern.ptr(), PyBUF_ANY_CONTIGUOUS); + + pattern_ptr = pattern_ward->m_buf.buf; + pattern_len = pattern_ward->m_buf.len; +#else + py::object pattern_ward = py_pattern; + if (PyObject_AsReadBuffer(py_pattern.ptr(), &pattern_ptr, &pattern_len)) + throw py::error_already_set(); +#endif + + size_t fill_size = dst.size(); + if (!byte_count.is_none()) + fill_size = py::cast(byte_count); + + cl_event evt; + PYOPENCL_CALL_GUARDED( + clEnqueueSVMMemFill, + ( + cq.data(), + dst.ptr(), pattern_ptr, + pattern_len, + fill_size, + PYOPENCL_WAITLIST_ARGS, + &evt + )); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + + inline + event *enqueue_svm_map( + command_queue &cq, + cl_bool is_blocking, + cl_map_flags flags, + svm_arg_wrapper &svm, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + cl_event evt; + PYOPENCL_CALL_GUARDED( + clEnqueueSVMMap, + ( + cq.data(), + is_blocking, + flags, + svm.ptr(), svm.size(), + PYOPENCL_WAITLIST_ARGS, + &evt + )); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + + inline + event *enqueue_svm_unmap( + command_queue &cq, + svm_arg_wrapper &svm, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + cl_event evt; + PYOPENCL_CALL_GUARDED( + clEnqueueSVMUnmap, + ( + cq.data(), + svm.ptr(), + PYOPENCL_WAITLIST_ARGS, + &evt + )); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } +#endif + + +#if PYOPENCL_CL_VERSION >= 0x2010 + inline + event *enqueue_svm_migratemem( + command_queue &cq, + py::sequence svms, + cl_mem_migration_flags flags, + py::object py_wait_for + ) + { + PYOPENCL_PARSE_WAIT_FOR; + + std::vector svm_pointers; + std::vector sizes; + + for (py::handle py_svm: svms) + { + svm_arg_wrapper &svm(py::cast(py_svm)); + + svm_pointers.push_back(svm.ptr()); + sizes.push_back(svm.size()); + } + + cl_event evt; + PYOPENCL_CALL_GUARDED( + clEnqueueSVMMigrateMem, + ( + cq.data(), + svm_pointers.size(), + svm_pointers.empty() ? nullptr : &svm_pointers.front(), + sizes.empty() ? nullptr : &sizes.front(), + flags, + PYOPENCL_WAITLIST_ARGS, + &evt + )); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } +#endif + + // }}} + + + // {{{ sampler + + class sampler : noncopyable + { + private: + cl_sampler m_sampler; + + public: +#if PYOPENCL_CL_VERSION >= 0x2000 + sampler(context const &ctx, py::sequence py_props) + { + int hex_plat_version = ctx.get_hex_platform_version(); + + if (hex_plat_version < 0x2000) + { + std::cerr << + "sampler properties given as an iterable, " + "which uses an OpenCL 2+-only interface, " + "but the context's platform does not " + "declare OpenCL 2 support. Proceeding " + "as requested, but the next thing you see " + "may be a crash." << std:: endl; + } + + cl_sampler_properties props[py::len(py_props) + 1]; + { + size_t i = 0; + for (auto prop: py_props) + props[i++] = py::cast(prop); + props[i++] = 0; + } + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateSamplerWithProperties"); + + m_sampler = clCreateSamplerWithProperties( + ctx.data(), + props, + &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("Sampler", status_code); + } +#endif + + sampler(context const &ctx, bool normalized_coordinates, + cl_addressing_mode am, cl_filter_mode fm) + { + PYOPENCL_PRINT_CALL_TRACE("clCreateSampler"); + + int hex_plat_version = ctx.get_hex_platform_version(); +#if PYOPENCL_CL_VERSION >= 0x2000 + if (hex_plat_version >= 0x2000) + { + cl_sampler_properties props_list[] = { + CL_SAMPLER_NORMALIZED_COORDS, normalized_coordinates, + CL_SAMPLER_ADDRESSING_MODE, am, + CL_SAMPLER_FILTER_MODE, fm, + 0, + }; + + cl_int status_code; + + PYOPENCL_PRINT_CALL_TRACE("clCreateSamplerWithProperties"); + m_sampler = clCreateSamplerWithProperties( + ctx.data(), props_list, &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("Sampler", status_code); + } + else +#endif + { + cl_int status_code; + +#if defined(__GNUG__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif + m_sampler = clCreateSampler( + ctx.data(), + normalized_coordinates, + am, fm, &status_code); +#if defined(__GNUG__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + + if (status_code != CL_SUCCESS) + throw pyopencl::error("Sampler", status_code); + } + } + + sampler(cl_sampler samp, bool retain) + : m_sampler(samp) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainSampler, (samp)); + } + + ~sampler() + { + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseSampler, (m_sampler)); + } + + cl_sampler data() const + { + return m_sampler; + } + + PYOPENCL_EQUALITY_TESTS(sampler); + + py::object get_info(cl_sampler_info param_name) const + { + switch (param_name) + { + case CL_SAMPLER_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name, + cl_uint); + case CL_SAMPLER_CONTEXT: + PYOPENCL_GET_OPAQUE_INFO(Sampler, m_sampler, param_name, + cl_context, context); + case CL_SAMPLER_ADDRESSING_MODE: + PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name, + cl_addressing_mode); + case CL_SAMPLER_FILTER_MODE: + PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name, + cl_filter_mode); + case CL_SAMPLER_NORMALIZED_COORDS: + PYOPENCL_GET_INTEGRAL_INFO(Sampler, m_sampler, param_name, + cl_bool); + + default: + throw error("Sampler.get_info", CL_INVALID_VALUE); + } + } + }; + + // }}} + + + // {{{ program + + class program : noncopyable + { + public: + enum program_kind_type { KND_UNKNOWN, KND_SOURCE, KND_BINARY }; + + private: + cl_program m_program; + program_kind_type m_program_kind; + + public: + program(cl_program prog, bool retain, program_kind_type progkind=KND_UNKNOWN) + : m_program(prog), m_program_kind(progkind) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainProgram, (prog)); + } + + ~program() + { + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseProgram, (m_program)); + } + + cl_program data() const + { + return m_program; + } + + program_kind_type kind() const + { + return m_program_kind; + } + + PYOPENCL_EQUALITY_TESTS(program); + + py::object get_info(cl_program_info param_name) const + { + switch (param_name) + { + case CL_PROGRAM_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name, + cl_uint); + case CL_PROGRAM_CONTEXT: + PYOPENCL_GET_OPAQUE_INFO(Program, m_program, param_name, + cl_context, context); + case CL_PROGRAM_NUM_DEVICES: + PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name, + cl_uint); + case CL_PROGRAM_DEVICES: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result); + + py::list py_result; + for (cl_device_id did: result) + py_result.append(handle_from_new_ptr( + new pyopencl::device(did))); + return py_result; + } + case CL_PROGRAM_SOURCE: + PYOPENCL_GET_STR_INFO(Program, m_program, param_name); + case CL_PROGRAM_BINARY_SIZES: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result); + PYOPENCL_RETURN_VECTOR(size_t, result); + } + case CL_PROGRAM_BINARIES: + // {{{ + { + std::vector sizes; + PYOPENCL_GET_VEC_INFO(Program, m_program, CL_PROGRAM_BINARY_SIZES, sizes); + + size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0); + + std::unique_ptr result( + new unsigned char[total_size]); + std::vector result_ptrs; + + unsigned char *ptr = result.get(); + for (unsigned i = 0; i < sizes.size(); ++i) + { + result_ptrs.push_back(ptr); + ptr += sizes[i]; + } + + PYOPENCL_CALL_GUARDED(clGetProgramInfo, + (m_program, param_name, sizes.size()*sizeof(unsigned char *), + result_ptrs.empty( ) ? nullptr : &result_ptrs.front(), 0)); \ + + py::list py_result; + ptr = result.get(); + for (unsigned i = 0; i < sizes.size(); ++i) + { + py::object binary_pyobj( + py::reinterpret_steal( +#if PY_VERSION_HEX >= 0x03000000 + PyBytes_FromStringAndSize( + reinterpret_cast(ptr), sizes[i]) +#else + PyString_FromStringAndSize( + reinterpret_cast(ptr), sizes[i]) +#endif + )); + py_result.append(binary_pyobj); + ptr += sizes[i]; + } + return py_result; + } + // }}} +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_PROGRAM_NUM_KERNELS: + PYOPENCL_GET_INTEGRAL_INFO(Program, m_program, param_name, + size_t); + case CL_PROGRAM_KERNEL_NAMES: + PYOPENCL_GET_STR_INFO(Program, m_program, param_name); +#endif + + default: + throw error("Program.get_info", CL_INVALID_VALUE); + } + } + + py::object get_build_info( + device const &dev, + cl_program_build_info param_name) const + { + switch (param_name) + { +#define PYOPENCL_FIRST_ARG m_program, dev.data() // hackety hack + case CL_PROGRAM_BUILD_STATUS: + PYOPENCL_GET_INTEGRAL_INFO(ProgramBuild, + PYOPENCL_FIRST_ARG, param_name, + cl_build_status); + case CL_PROGRAM_BUILD_OPTIONS: + case CL_PROGRAM_BUILD_LOG: + PYOPENCL_GET_STR_INFO(ProgramBuild, + PYOPENCL_FIRST_ARG, param_name); +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_PROGRAM_BINARY_TYPE: + PYOPENCL_GET_INTEGRAL_INFO(ProgramBuild, + PYOPENCL_FIRST_ARG, param_name, + cl_program_binary_type); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE: + PYOPENCL_GET_INTEGRAL_INFO(ProgramBuild, + PYOPENCL_FIRST_ARG, param_name, + size_t); +#endif +#undef PYOPENCL_FIRST_ARG + + default: + throw error("Program.get_build_info", CL_INVALID_VALUE); + } + } + + void build(std::string options, py::object py_devices) + { + PYOPENCL_PARSE_PY_DEVICES; + + PYOPENCL_CALL_GUARDED_THREADED(clBuildProgram, + (m_program, num_devices, devices, + options.c_str(), 0 ,0)); + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + void compile(std::string options, py::object py_devices, + py::object py_headers) + { + PYOPENCL_PARSE_PY_DEVICES; + + // {{{ pick apart py_headers + // py_headers is a list of tuples *(name, program)* + + std::vector header_names; + std::vector programs; + for (py::handle name_hdr_tup_py: py_headers) + { + py::tuple name_hdr_tup = py::reinterpret_borrow(name_hdr_tup_py); + if (py::len(name_hdr_tup) != 2) + throw error("Program.compile", CL_INVALID_VALUE, + "epxected (name, header) tuple in headers list"); + std::string name = (name_hdr_tup[0]).cast(); + program &prg = (name_hdr_tup[1]).cast(); + + header_names.push_back(name); + programs.push_back(prg.data()); + } + + std::vector header_name_ptrs; + for (std::string const &name: header_names) + header_name_ptrs.push_back(name.c_str()); + + // }}} + + PYOPENCL_CALL_GUARDED_THREADED(clCompileProgram, + (m_program, num_devices, devices, + options.c_str(), header_names.size(), + programs.empty() ? nullptr : &programs.front(), + header_name_ptrs.empty() ? nullptr : &header_name_ptrs.front(), + 0, 0)); + } +#endif + }; + + + + + inline + program *create_program_with_source( + context &ctx, + std::string const &src) + { + const char *string = src.c_str(); + size_t length = src.size(); + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithSource"); + cl_program result = clCreateProgramWithSource( + ctx.data(), 1, &string, &length, &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateProgramWithSource", status_code); + + try + { + return new program(result, false, program::KND_SOURCE); + } + catch (...) + { + clReleaseProgram(result); + throw; + } + } + + + + + + inline + program *create_program_with_binary( + context &ctx, + py::sequence py_devices, + py::sequence py_binaries) + { + std::vector devices; + std::vector binaries; + std::vector sizes; + + size_t num_devices = len(py_devices); + if (len(py_binaries) != num_devices) + throw error("create_program_with_binary", CL_INVALID_VALUE, + "device and binary counts don't match"); + + for (size_t i = 0; i < num_devices; ++i) + { + devices.push_back( + (py_devices[i]).cast().data()); + const void *buf; + PYOPENCL_BUFFER_SIZE_T len; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + py_buffer_wrapper buf_wrapper; + + buf_wrapper.get(py::object(py_binaries[i]).ptr(), PyBUF_ANY_CONTIGUOUS); + + buf = buf_wrapper.m_buf.buf; + len = buf_wrapper.m_buf.len; +#else + if (PyObject_AsReadBuffer( + py::object(py_binaries[i]).ptr(), &buf, &len)) + throw py::error_already_set(); +#endif + + binaries.push_back(reinterpret_cast(buf)); + sizes.push_back(len); + } + + cl_int binary_statuses[num_devices]; + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBinary"); + cl_program result = clCreateProgramWithBinary( + ctx.data(), num_devices, + devices.empty( ) ? nullptr : &devices.front(), + sizes.empty( ) ? nullptr : &sizes.front(), + binaries.empty( ) ? nullptr : &binaries.front(), + binary_statuses, + &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateProgramWithBinary", status_code); + + /* + for (int i = 0; i < num_devices; ++i) + printf("%d:%d\n", i, binary_statuses[i]); + */ + + try + { + return new program(result, false, program::KND_BINARY); + } + catch (...) + { + clReleaseProgram(result); + throw; + } + } + + + +#if (PYOPENCL_CL_VERSION >= 0x1020) && \ + ((PYOPENCL_CL_VERSION >= 0x1030) && defined(__APPLE__)) + inline + program *create_program_with_built_in_kernels( + context &ctx, + py::object py_devices, + std::string const &kernel_names) + { + PYOPENCL_PARSE_PY_DEVICES; + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBuiltInKernels"); + cl_program result = clCreateProgramWithBuiltInKernels( + ctx.data(), num_devices, devices, + kernel_names.c_str(), &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateProgramWithBuiltInKernels", status_code); + + try + { + return new program(result, false); + } + catch (...) + { + clReleaseProgram(result); + throw; + } + } +#endif + + + +#if PYOPENCL_CL_VERSION >= 0x1020 + inline + program *link_program( + context &ctx, + py::object py_programs, + std::string const &options, + py::object py_devices + ) + { + PYOPENCL_PARSE_PY_DEVICES; + + std::vector programs; + for (py::handle py_prg: py_programs) + { + program &prg = (py_prg).cast(); + programs.push_back(prg.data()); + } + + cl_int status_code; + PYOPENCL_PRINT_CALL_TRACE("clLinkProgram"); + cl_program result = clLinkProgram( + ctx.data(), num_devices, devices, + options.c_str(), + programs.size(), + programs.empty() ? nullptr : &programs.front(), + 0, 0, + &status_code); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("clLinkPorgram", status_code); + + try + { + return new program(result, false); + } + catch (...) + { + clReleaseProgram(result); + throw; + } + } + +#endif + + +#if PYOPENCL_CL_VERSION >= 0x1020 + inline + void unload_platform_compiler(platform &plat) + { + PYOPENCL_CALL_GUARDED(clUnloadPlatformCompiler, (plat.data())); + } +#endif + + // }}} + + + // {{{ kernel + class local_memory + { + private: + size_t m_size; + + public: + local_memory(size_t size) + : m_size(size) + { } + + size_t size() const + { return m_size; } + }; + + + + + class kernel : noncopyable + { + private: + cl_kernel m_kernel; + + public: + kernel(cl_kernel knl, bool retain) + : m_kernel(knl) + { + if (retain) + PYOPENCL_CALL_GUARDED(clRetainKernel, (knl)); + } + + kernel(program const &prg, std::string const &kernel_name) + { + cl_int status_code; + + PYOPENCL_PRINT_CALL_TRACE("clCreateKernel"); + m_kernel = clCreateKernel(prg.data(), kernel_name.c_str(), + &status_code); + if (status_code != CL_SUCCESS) + throw pyopencl::error("clCreateKernel", status_code); + } + + ~kernel() + { + PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseKernel, (m_kernel)); + } + + cl_kernel data() const + { + return m_kernel; + } + + PYOPENCL_EQUALITY_TESTS(kernel); + + void set_arg_null(cl_uint arg_index) + { + cl_mem m = 0; + PYOPENCL_CALL_GUARDED(clSetKernelArg, (m_kernel, arg_index, + sizeof(cl_mem), &m)); + } + + void set_arg_mem(cl_uint arg_index, memory_object_holder &moh) + { + cl_mem m = moh.data(); + PYOPENCL_CALL_GUARDED(clSetKernelArg, + (m_kernel, arg_index, sizeof(cl_mem), &m)); + } + + void set_arg_local(cl_uint arg_index, local_memory const &loc) + { + PYOPENCL_CALL_GUARDED(clSetKernelArg, + (m_kernel, arg_index, loc.size(), 0)); + } + + void set_arg_sampler(cl_uint arg_index, sampler const &smp) + { + cl_sampler s = smp.data(); + PYOPENCL_CALL_GUARDED(clSetKernelArg, + (m_kernel, arg_index, sizeof(cl_sampler), &s)); + } + + void set_arg_buf(cl_uint arg_index, py::object py_buffer) + { + const void *buf; + PYOPENCL_BUFFER_SIZE_T len; + +#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE + py_buffer_wrapper buf_wrapper; + + try + { + buf_wrapper.get(py_buffer.ptr(), PyBUF_ANY_CONTIGUOUS); + } + catch (py::error_already_set &) + { + PyErr_Clear(); + throw error("Kernel.set_arg", CL_INVALID_VALUE, + "invalid kernel argument"); + } + + buf = buf_wrapper.m_buf.buf; + len = buf_wrapper.m_buf.len; +#else + if (PyObject_AsReadBuffer(py_buffer.ptr(), &buf, &len)) + { + PyErr_Clear(); + throw error("Kernel.set_arg", CL_INVALID_VALUE, + "invalid kernel argument"); + } +#endif + + PYOPENCL_CALL_GUARDED(clSetKernelArg, + (m_kernel, arg_index, len, buf)); + } + +#if PYOPENCL_CL_VERSION >= 0x2000 + void set_arg_svm(cl_uint arg_index, svm_arg_wrapper const &wrp) + { + PYOPENCL_CALL_GUARDED(clSetKernelArgSVMPointer, + (m_kernel, arg_index, wrp.ptr())); + } +#endif + + void set_arg(cl_uint arg_index, py::object arg) + { + if (arg.ptr() == Py_None) + { + set_arg_null(arg_index); + return; + } + + try + { + set_arg_mem(arg_index, arg.cast()); + return; + } + catch (py::cast_error &) { } + +#if PYOPENCL_CL_VERSION >= 0x2000 + try + { + set_arg_svm(arg_index, arg.cast()); + return; + } + catch (py::cast_error &) { } +#endif + + try + { + set_arg_local(arg_index, arg.cast()); + return; + } + catch (py::cast_error &) { } + + try + { + set_arg_sampler(arg_index, arg.cast()); + return; + } + catch (py::cast_error &) { } + + set_arg_buf(arg_index, arg); + } + + py::object get_info(cl_kernel_info param_name) const + { + switch (param_name) + { + case CL_KERNEL_FUNCTION_NAME: + PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name); + case CL_KERNEL_NUM_ARGS: + case CL_KERNEL_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(Kernel, m_kernel, param_name, + cl_uint); + case CL_KERNEL_CONTEXT: + PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name, + cl_context, context); + case CL_KERNEL_PROGRAM: + PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name, + cl_program, program); +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_KERNEL_ATTRIBUTES: + PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name); +#endif + default: + throw error("Kernel.get_info", CL_INVALID_VALUE); + } + } + + py::object get_work_group_info( + cl_kernel_work_group_info param_name, + device const &dev + ) const + { + switch (param_name) + { +#define PYOPENCL_FIRST_ARG m_kernel, dev.data() // hackety hack + case CL_KERNEL_WORK_GROUP_SIZE: + PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup, + PYOPENCL_FIRST_ARG, param_name, + size_t); + case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: + { + std::vector result; + PYOPENCL_GET_VEC_INFO(KernelWorkGroup, + PYOPENCL_FIRST_ARG, param_name, result); + + PYOPENCL_RETURN_VECTOR(size_t, result); + } + case CL_KERNEL_LOCAL_MEM_SIZE: +#if PYOPENCL_CL_VERSION >= 0x1010 + case CL_KERNEL_PRIVATE_MEM_SIZE: +#endif + PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup, + PYOPENCL_FIRST_ARG, param_name, + cl_ulong); + +#if PYOPENCL_CL_VERSION >= 0x1010 + case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: + PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup, + PYOPENCL_FIRST_ARG, param_name, + size_t); +#endif + default: + throw error("Kernel.get_work_group_info", CL_INVALID_VALUE); +#undef PYOPENCL_FIRST_ARG + } + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + py::object get_arg_info( + cl_uint arg_index, + cl_kernel_arg_info param_name + ) const + { + switch (param_name) + { +#define PYOPENCL_FIRST_ARG m_kernel, arg_index // hackety hack + case CL_KERNEL_ARG_ADDRESS_QUALIFIER: + PYOPENCL_GET_INTEGRAL_INFO(KernelArg, + PYOPENCL_FIRST_ARG, param_name, + cl_kernel_arg_address_qualifier); + + case CL_KERNEL_ARG_ACCESS_QUALIFIER: + PYOPENCL_GET_INTEGRAL_INFO(KernelArg, + PYOPENCL_FIRST_ARG, param_name, + cl_kernel_arg_access_qualifier); + + case CL_KERNEL_ARG_TYPE_NAME: + case CL_KERNEL_ARG_NAME: + PYOPENCL_GET_STR_INFO(KernelArg, PYOPENCL_FIRST_ARG, param_name); +#undef PYOPENCL_FIRST_ARG + default: + throw error("Kernel.get_arg_info", CL_INVALID_VALUE); + } + } +#endif + }; + + + inline + py::list create_kernels_in_program(program &pgm) + { + cl_uint num_kernels; + PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, ( + pgm.data(), 0, 0, &num_kernels)); + + std::vector kernels(num_kernels); + PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, ( + pgm.data(), num_kernels, + kernels.empty( ) ? nullptr : &kernels.front(), &num_kernels)); + + py::list result; + for (cl_kernel knl: kernels) + result.append(handle_from_new_ptr(new kernel(knl, true))); + + return result; + } + + + + inline + event *enqueue_nd_range_kernel( + command_queue &cq, + kernel &knl, + py::object py_global_work_size, + py::object py_local_work_size, + py::object py_global_work_offset, + py::object py_wait_for, + bool g_times_l) + { + PYOPENCL_PARSE_WAIT_FOR; + + cl_uint work_dim = len(py_global_work_size); + + std::vector global_work_size; + COPY_PY_LIST(size_t, global_work_size); + + size_t *local_work_size_ptr = 0; + std::vector local_work_size; + if (py_local_work_size.ptr() != Py_None) + { + if (g_times_l) + work_dim = std::max(work_dim, unsigned(len(py_local_work_size))); + else + if (work_dim != unsigned(len(py_local_work_size))) + throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE, + "global/local work sizes have differing dimensions"); + + COPY_PY_LIST(size_t, local_work_size); + + while (local_work_size.size() < work_dim) + local_work_size.push_back(1); + while (global_work_size.size() < work_dim) + global_work_size.push_back(1); + + local_work_size_ptr = local_work_size.empty( ) ? nullptr : &local_work_size.front(); + } + + if (g_times_l && local_work_size_ptr) + { + for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis) + global_work_size[work_axis] *= local_work_size[work_axis]; + } + + size_t *global_work_offset_ptr = 0; + std::vector global_work_offset; + if (py_global_work_offset.ptr() != Py_None) + { + if (work_dim != unsigned(len(py_global_work_offset))) + throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE, + "global work size and offset have differing dimensions"); + + COPY_PY_LIST(size_t, global_work_offset); + + if (g_times_l && local_work_size_ptr) + { + for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis) + global_work_offset[work_axis] *= local_work_size[work_axis]; + } + + global_work_offset_ptr = global_work_offset.empty( ) ? nullptr : &global_work_offset.front(); + } + + PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( { + cl_event evt; + PYOPENCL_CALL_GUARDED(clEnqueueNDRangeKernel, ( + cq.data(), + knl.data(), + work_dim, + global_work_offset_ptr, + global_work_size.empty( ) ? nullptr : &global_work_size.front(), + local_work_size_ptr, + PYOPENCL_WAITLIST_ARGS, &evt + )); + PYOPENCL_RETURN_NEW_EVENT(evt); + } ); + } + + // }}} + + + // {{{ gl interop + inline + bool have_gl() + { +#ifdef HAVE_GL + return true; +#else + return false; +#endif + } + + + + +#ifdef HAVE_GL + +#ifdef __APPLE__ + inline + cl_context_properties get_apple_cgl_share_group() + { + CGLContextObj kCGLContext = CGLGetCurrentContext(); + CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext); + + return (cl_context_properties) kCGLShareGroup; + } +#endif /* __APPLE__ */ + + + + + class gl_buffer : public memory_object + { + public: + gl_buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) + : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) + { } + }; + + + + + class gl_renderbuffer : public memory_object + { + public: + gl_renderbuffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) + : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) + { } + }; + + + + + class gl_texture : public image + { + public: + gl_texture(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t()) + : image(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf)) + { } + + py::object get_gl_texture_info(cl_gl_texture_info param_name) + { + switch (param_name) + { + case CL_GL_TEXTURE_TARGET: + PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLenum); + case CL_GL_MIPMAP_LEVEL: + PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLint); + + default: + throw error("MemoryObject.get_gl_texture_info", CL_INVALID_VALUE); + } + } + }; + + + + +#define PYOPENCL_WRAP_BUFFER_CREATOR(TYPE, NAME, CL_NAME, ARGS, CL_ARGS) \ + inline \ + TYPE *NAME ARGS \ + { \ + cl_int status_code; \ + PYOPENCL_PRINT_CALL_TRACE(#CL_NAME); \ + cl_mem mem = CL_NAME CL_ARGS; \ + \ + if (status_code != CL_SUCCESS) \ + throw pyopencl::error(#CL_NAME, status_code); \ + \ + try \ + { \ + return new TYPE(mem, false); \ + } \ + catch (...) \ + { \ + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); \ + throw; \ + } \ + } + + + + + PYOPENCL_WRAP_BUFFER_CREATOR(gl_buffer, + create_from_gl_buffer, clCreateFromGLBuffer, + (context &ctx, cl_mem_flags flags, GLuint bufobj), + (ctx.data(), flags, bufobj, &status_code)); + PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture, + create_from_gl_texture_2d, clCreateFromGLTexture2D, + (context &ctx, cl_mem_flags flags, + GLenum texture_target, GLint miplevel, GLuint texture), + (ctx.data(), flags, texture_target, miplevel, texture, &status_code)); + PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture, + create_from_gl_texture_3d, clCreateFromGLTexture3D, + (context &ctx, cl_mem_flags flags, + GLenum texture_target, GLint miplevel, GLuint texture), + (ctx.data(), flags, texture_target, miplevel, texture, &status_code)); + PYOPENCL_WRAP_BUFFER_CREATOR(gl_renderbuffer, + create_from_gl_renderbuffer, clCreateFromGLRenderbuffer, + (context &ctx, cl_mem_flags flags, GLuint renderbuffer), + (ctx.data(), flags, renderbuffer, &status_code)); + + inline + gl_texture *create_from_gl_texture( + context &ctx, cl_mem_flags flags, + GLenum texture_target, GLint miplevel, + GLuint texture, unsigned dims) + { + if (dims == 2) + return create_from_gl_texture_2d(ctx, flags, texture_target, miplevel, texture); + else if (dims == 3) + return create_from_gl_texture_3d(ctx, flags, texture_target, miplevel, texture); + else + throw pyopencl::error("Image", CL_INVALID_VALUE, + "invalid dimension"); + } + + + + + + inline + py::tuple get_gl_object_info(memory_object_holder const &mem) + { + cl_gl_object_type otype; + GLuint gl_name; + PYOPENCL_CALL_GUARDED(clGetGLObjectInfo, (mem.data(), &otype, &gl_name)); + return py::make_tuple(otype, gl_name); + } + +#define WRAP_GL_ENQUEUE(what, What) \ + inline \ + event *enqueue_##what##_gl_objects( \ + command_queue &cq, \ + py::object py_mem_objects, \ + py::object py_wait_for) \ + { \ + PYOPENCL_PARSE_WAIT_FOR; \ + \ + std::vector mem_objects; \ + for (py::handle mo: py_mem_objects) \ + mem_objects.push_back((mo).cast().data()); \ + \ + cl_event evt; \ + PYOPENCL_CALL_GUARDED(clEnqueue##What##GLObjects, ( \ + cq.data(), \ + mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(), \ + PYOPENCL_WAITLIST_ARGS, &evt \ + )); \ + \ + PYOPENCL_RETURN_NEW_EVENT(evt); \ + } + + WRAP_GL_ENQUEUE(acquire, Acquire); + WRAP_GL_ENQUEUE(release, Release); +#endif + + + + +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) + inline + py::object get_gl_context_info_khr( + py::object py_properties, + cl_gl_context_info param_name, + py::object py_platform + ) + { + std::vector props + = parse_context_properties(py_properties); + + typedef CL_API_ENTRY cl_int (CL_API_CALL + *func_ptr_type)(const cl_context_properties * /* properties */, + cl_gl_context_info /* param_name */, + size_t /* param_value_size */, + void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + + func_ptr_type func_ptr; + +#if PYOPENCL_CL_VERSION >= 0x1020 + if (py_platform.ptr() != Py_None) + { + platform &plat = (py_platform).cast(); + + func_ptr = (func_ptr_type) clGetExtensionFunctionAddressForPlatform( + plat.data(), "clGetGLContextInfoKHR"); + } + else + { + PYOPENCL_DEPRECATED("get_gl_context_info_khr with platform=None", "2013.1", ); + + func_ptr = (func_ptr_type) clGetExtensionFunctionAddress( + "clGetGLContextInfoKHR"); + } +#else + func_ptr = (func_ptr_type) clGetExtensionFunctionAddress( + "clGetGLContextInfoKHR"); +#endif + + + if (!func_ptr) + throw error("Context.get_info", CL_INVALID_PLATFORM, + "clGetGLContextInfoKHR extension function not present"); + + cl_context_properties *props_ptr + = props.empty( ) ? nullptr : &props.front(); + + switch (param_name) + { + case CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR: + { + cl_device_id param_value; + PYOPENCL_CALL_GUARDED(func_ptr, + (props_ptr, param_name, sizeof(param_value), ¶m_value, 0)); + return py::object(handle_from_new_ptr( \ + new device(param_value, /*retain*/ true))); + } + + case CL_DEVICES_FOR_GL_CONTEXT_KHR: + { + size_t size; + PYOPENCL_CALL_GUARDED(func_ptr, + (props_ptr, param_name, 0, 0, &size)); + + std::vector devices; + + devices.resize(size / sizeof(devices.front())); + + PYOPENCL_CALL_GUARDED(func_ptr, + (props_ptr, param_name, size, + devices.empty( ) ? nullptr : &devices.front(), &size)); + + py::list result; + for (cl_device_id did: devices) + result.append(handle_from_new_ptr( + new device(did))); + + return result; + } + + default: + throw error("get_gl_context_info_khr", CL_INVALID_VALUE); + } + } + +#endif + + // }}} + + + // {{{ deferred implementation bits + + inline py::object create_mem_object_wrapper(cl_mem mem, bool retain=true) + { + cl_mem_object_type mem_obj_type; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \ + (mem, CL_MEM_TYPE, sizeof(mem_obj_type), &mem_obj_type, 0)); + + switch (mem_obj_type) + { + case CL_MEM_OBJECT_BUFFER: + return py::object(handle_from_new_ptr( + new buffer(mem, retain))); + case CL_MEM_OBJECT_IMAGE2D: + case CL_MEM_OBJECT_IMAGE3D: +#if PYOPENCL_CL_VERSION >= 0x1020 + case CL_MEM_OBJECT_IMAGE2D_ARRAY: + case CL_MEM_OBJECT_IMAGE1D: + case CL_MEM_OBJECT_IMAGE1D_ARRAY: + case CL_MEM_OBJECT_IMAGE1D_BUFFER: +#endif + return py::object(handle_from_new_ptr( + new image(mem, retain))); + default: + return py::object(handle_from_new_ptr( + new memory_object(mem, retain))); + } + } + + inline + py::object memory_object_from_int(intptr_t cl_mem_as_int, bool retain) + { + return create_mem_object_wrapper((cl_mem) cl_mem_as_int, retain); + } + + + inline + py::object memory_object_holder::get_info(cl_mem_info param_name) const + { + switch (param_name) + { + case CL_MEM_TYPE: + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, + cl_mem_object_type); + case CL_MEM_FLAGS: + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, + cl_mem_flags); + case CL_MEM_SIZE: + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, + size_t); + case CL_MEM_HOST_PTR: + throw pyopencl::error("MemoryObject.get_info", CL_INVALID_VALUE, + "Use MemoryObject.get_host_array to get host pointer."); + case CL_MEM_MAP_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, + cl_uint); + case CL_MEM_REFERENCE_COUNT: + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, + cl_uint); + case CL_MEM_CONTEXT: + PYOPENCL_GET_OPAQUE_INFO(MemObject, data(), param_name, + cl_context, context); + +#if PYOPENCL_CL_VERSION >= 0x1010 + case CL_MEM_ASSOCIATED_MEMOBJECT: + { + cl_mem param_value; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \ + (data(), param_name, sizeof(param_value), ¶m_value, 0)); + if (param_value == 0) + { + // no associated memory object? no problem. + return py::none(); + } + + return create_mem_object_wrapper(param_value); + } + case CL_MEM_OFFSET: + PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name, + size_t); +#endif + + default: + throw error("MemoryObjectHolder.get_info", CL_INVALID_VALUE); + } + } + + // FIXME: Reenable in pypy +#ifndef PYPY_VERSION + inline + py::object get_mem_obj_host_array( + py::object mem_obj_py, + py::object shape, py::object dtype, + py::object order_py) + { + memory_object_holder const &mem_obj = + (mem_obj_py).cast(); + PyArray_Descr *tp_descr; + if (PyArray_DescrConverter(dtype.ptr(), &tp_descr) != NPY_SUCCEED) + throw py::error_already_set(); + cl_mem_flags mem_flags; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (mem_obj.data(), CL_MEM_FLAGS, sizeof(mem_flags), &mem_flags, 0)); + if (!(mem_flags & CL_MEM_USE_HOST_PTR)) + throw pyopencl::error("MemoryObject.get_host_array", CL_INVALID_VALUE, + "Only MemoryObject with USE_HOST_PTR " + "is supported."); + + std::vector dims; + try + { + dims.push_back(py::cast(shape)); + } + catch (py::cast_error &) + { + for (auto it: shape) + dims.push_back(it.cast()); + } + + NPY_ORDER order = PyArray_CORDER; + PyArray_OrderConverter(order_py.ptr(), &order); + + int ary_flags = 0; + if (order == PyArray_FORTRANORDER) + ary_flags |= NPY_FARRAY; + else if (order == PyArray_CORDER) + ary_flags |= NPY_CARRAY; + else + throw std::runtime_error("unrecognized order specifier"); + + void *host_ptr; + size_t mem_obj_size; + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (mem_obj.data(), CL_MEM_HOST_PTR, sizeof(host_ptr), + &host_ptr, 0)); + PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, + (mem_obj.data(), CL_MEM_SIZE, sizeof(mem_obj_size), + &mem_obj_size, 0)); + + py::object result = py::reinterpret_steal(PyArray_NewFromDescr( + &PyArray_Type, tp_descr, + dims.size(), &dims.front(), /*strides*/ nullptr, + host_ptr, ary_flags, /*obj*/nullptr)); + + if ((size_t) PyArray_NBYTES(result.ptr()) > mem_obj_size) + throw pyopencl::error("MemoryObject.get_host_array", + CL_INVALID_VALUE, + "Resulting array is larger than memory object."); + + PyArray_BASE(result.ptr()) = mem_obj_py.ptr(); + Py_INCREF(mem_obj_py.ptr()); + + return result; + } +#endif + + // }}} +} + +#endif + +// vim: foldmethod=marker diff --git a/src/wrap_cl_part_1.cpp b/src/wrap_cl_part_1.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c3e6d5e81d2db08e43d60abcfa4b6c6cbfe32b3b --- /dev/null +++ b/src/wrap_cl_part_1.cpp @@ -0,0 +1,328 @@ +#include "wrap_cl.hpp" + + +using namespace pyopencl; + + +void pyopencl_expose_part_1(py::module &m) +{ + m.def("get_cl_header_version", get_cl_header_version); + m.def("_sizeof_size_t", [](){ return sizeof(size_t); }); + + // {{{ platform + DEF_SIMPLE_FUNCTION(get_platforms); + + { + typedef platform cls; + py::class_(m, "Platform", py::dynamic_attr()) + .DEF_SIMPLE_METHOD(get_info) + .def("get_devices", &cls::get_devices, + py::arg("device_type")=CL_DEVICE_TYPE_ALL) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_platform_id) + ; + } + + // }}} + + // {{{ device + { + typedef device cls; + py::class_(m, "Device", py::dynamic_attr()) + .DEF_SIMPLE_METHOD(get_info) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) +#if PYOPENCL_CL_VERSION >= 0x1020 + .DEF_SIMPLE_METHOD(create_sub_devices) +#endif + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_device_id) + ; + } + + // }}} + + // {{{ context + + { + typedef context cls; + py::class_>(m, "Context", py::dynamic_attr()) + .def( + py::init( + [](py::object py_devices, py::object py_properties, + py::object py_dev_type) + { + PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( + return create_context_inner( + py_devices, + py_properties, + py_dev_type); + ) + }), + py::arg("devices")=py::none(), + py::arg("properties")=py::none(), + py::arg("dev_type")=py::none() + ) + .DEF_SIMPLE_METHOD(get_info) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_context) + ; + } + + // }}} + + // {{{ command queue + { + typedef command_queue cls; + py::class_>(m, "CommandQueue", py::dynamic_attr()) + .def( + py::init(), + py::arg("context"), + py::arg("device").none(true)=py::none(), + py::arg("properties")=py::cast(0)) + .DEF_SIMPLE_METHOD(get_info) +#if PYOPENCL_CL_VERSION < 0x1010 + .DEF_SIMPLE_METHOD(set_property) +#endif + .DEF_SIMPLE_METHOD(flush) + .DEF_SIMPLE_METHOD(finish) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_command_queue) + ; + } + + // }}} + + // {{{ events/synchronization + { + typedef event cls; + py::class_(m, "Event", py::dynamic_attr()) + .DEF_SIMPLE_METHOD(get_info) + .DEF_SIMPLE_METHOD(get_profiling_info) + .DEF_SIMPLE_METHOD(wait) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_event) +#if PYOPENCL_CL_VERSION >= 0x1010 + .DEF_SIMPLE_METHOD(set_callback) +#endif + ; + } + { + typedef nanny_event cls; + py::class_(m, "NannyEvent", py::dynamic_attr()) + .DEF_SIMPLE_METHOD(get_ward) + ; + } + + DEF_SIMPLE_FUNCTION(wait_for_events); + +#if PYOPENCL_CL_VERSION >= 0x1020 + m.def("_enqueue_marker_with_wait_list", enqueue_marker_with_wait_list, + py::arg("queue"), py::arg("wait_for")=py::none() + ); +#endif + m.def("_enqueue_marker", enqueue_marker, + py::arg("queue") + ); + m.def("_enqueue_wait_for_events", enqueue_wait_for_events, + py::arg("queue"), py::arg("wait_for")=py::none()); + +#if PYOPENCL_CL_VERSION >= 0x1020 + m.def("_enqueue_barrier_with_wait_list", enqueue_barrier_with_wait_list, + py::arg("queue"), py::arg("wait_for")=py::none() + ); +#endif + m.def("_enqueue_barrier", enqueue_barrier, py::arg("queue")); + +#if PYOPENCL_CL_VERSION >= 0x1010 + { + typedef user_event cls; + py::class_(m, "UserEvent", py::dynamic_attr()) + .def(py::init( + [](context &ctx) + { + return create_user_event(ctx); + }), + py::arg("context")) + .DEF_SIMPLE_METHOD(set_status) + ; + } +#endif + + // }}} + + // {{{ memory_object + + { + typedef memory_object_holder cls; + py::class_(m, "MemoryObjectHolder", py::dynamic_attr()) + .DEF_SIMPLE_METHOD(get_info) + // FIXME: Reenable in pypy +#ifndef PYPY_VERSION + .def("get_host_array", get_mem_obj_host_array, + py::arg("shape"), + py::arg("dtype"), + py::arg("order")="C") +#endif + .def("__eq__", [](const cls &self, const cls &other){ return self == other; }) + .def("__ne__", [](const cls &self, const cls &other){ return self != other; }) + .def("__hash__", &cls::hash) + + .def_property_readonly("int_ptr", to_int_ptr, + "Return an integer corresponding to the pointer value " + "of the underlying :c:type:`cl_mem`. " + "Use :meth:`from_int_ptr` to turn back into a Python object." + "\n\n.. versionadded:: 2013.2\n") + ; + } + { + typedef memory_object cls; + py::class_(m, "MemoryObject", py::dynamic_attr()) + .DEF_SIMPLE_METHOD(release) + .def_property_readonly("hostbuf", &cls::hostbuf) + + .def_static("from_int_ptr", memory_object_from_int, + "(static method) Return a new Python object referencing the C-level " + ":c:type:`cl_mem` object at the location pointed to " + "by *int_ptr_value*. The relevant :c:func:`clRetain*` function " + "will be called if *retain* is True." + "If the previous owner of the object will *not* release the reference, " + "*retain* should be set to *False*, to effectively transfer ownership to " + ":mod:`pyopencl`." + "\n\n.. versionadded:: 2013.2\n" + "\n\n.. versionchanged:: 2016.1\n\n *retain* added.", + py::arg("int_ptr_value"), + py::arg("retain")=true) + ; + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + m.def("enqueue_migrate_mem_objects", enqueue_migrate_mem_objects, + py::arg("queue"), + py::arg("mem_objects"), + py::arg("flags")=0, + py::arg("wait_for")=py::none() + ); +#endif + + // }}} + + // {{{ buffer + { + typedef buffer cls; + py::class_(m, "Buffer", py::dynamic_attr()) + .def( + py::init( + [](context &ctx, cl_mem_flags flags, size_t size, py::object py_hostbuf) + { return create_buffer_py(ctx, flags, size, py_hostbuf); } + ), + py::arg("context"), + py::arg("flags"), + py::arg("size")=0, + py::arg("hostbuf")=py::none() + ) +#if PYOPENCL_CL_VERSION >= 0x1010 + .def("get_sub_region", &cls::get_sub_region, + py::arg("origin"), + py::arg("size"), + py::arg("flags")=0 + ) + .def("__getitem__", &cls::getitem) +#endif + ; + } + + // }}} + + // {{{ transfers + + // {{{ byte-for-byte + m.def("_enqueue_read_buffer", enqueue_read_buffer, + py::arg("queue"), + py::arg("mem"), + py::arg("hostbuf"), + py::arg("device_offset")=0, + py::arg("wait_for")=py::none(), + py::arg("is_blocking")=true + ); + m.def("_enqueue_write_buffer", enqueue_write_buffer, + py::arg("queue"), + py::arg("mem"), + py::arg("hostbuf"), + py::arg("device_offset")=0, + py::arg("wait_for")=py::none(), + py::arg("is_blocking")=true + ); + m.def("_enqueue_copy_buffer", enqueue_copy_buffer, + py::arg("queue"), + py::arg("src"), + py::arg("dst"), + py::arg("byte_count")=-1, + py::arg("src_offset")=0, + py::arg("dst_offset")=0, + py::arg("wait_for")=py::none() + ); + + // }}} + + // {{{ rectangular + +#if PYOPENCL_CL_VERSION >= 0x1010 + m.def("_enqueue_read_buffer_rect", enqueue_read_buffer_rect, + py::arg("queue"), + py::arg("mem"), + py::arg("hostbuf"), + py::arg("buffer_origin"), + py::arg("host_origin"), + py::arg("region"), + py::arg("buffer_pitches")=py::none(), + py::arg("host_pitches")=py::none(), + py::arg("wait_for")=py::none(), + py::arg("is_blocking")=true + ); + m.def("_enqueue_write_buffer_rect", enqueue_write_buffer_rect, + py::arg("queue"), + py::arg("mem"), + py::arg("hostbuf"), + py::arg("buffer_origin"), + py::arg("host_origin"), + py::arg("region"), + py::arg("buffer_pitches")=py::none(), + py::arg("host_pitches")=py::none(), + py::arg("wait_for")=py::none(), + py::arg("is_blocking")=true + ); + m.def("_enqueue_copy_buffer_rect", enqueue_copy_buffer_rect, + py::arg("queue"), + py::arg("src"), + py::arg("dst"), + py::arg("src_origin"), + py::arg("dst_origin"), + py::arg("region"), + py::arg("src_pitches")=py::none(), + py::arg("dst_pitches")=py::none(), + py::arg("wait_for")=py::none() + ); +#endif + + // }}} + + // }}} + +#if PYOPENCL_CL_VERSION >= 0x1020 + m.def("_enqueue_fill_buffer", enqueue_fill_buffer, + py::arg("queue"), py::arg("mem"), py::arg("pattern"), + py::arg("offset"), py::arg("size"), + py::arg("wait_for")=py::none()); +#endif +} + +// vim: foldmethod=marker diff --git a/src/wrap_cl_part_2.cpp b/src/wrap_cl_part_2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7b4c2c9e8eea5d18fdb68cdc0549ab781b353580 --- /dev/null +++ b/src/wrap_cl_part_2.cpp @@ -0,0 +1,559 @@ +#include "wrap_cl.hpp" + + + + +namespace pyopencl { +#if PYOPENCL_CL_VERSION >= 0x1020 + py::object image_desc_dummy_getter(cl_image_desc &desc) + { + return py::none(); + } + + void image_desc_set_shape(cl_image_desc &desc, py::object py_shape) + { + COPY_PY_REGION_TRIPLE(shape); + desc.image_width = shape[0]; + desc.image_height = shape[1]; + desc.image_depth = shape[2]; + desc.image_array_size = shape[2]; + } + + void image_desc_set_pitches(cl_image_desc &desc, py::object py_pitches) + { + COPY_PY_PITCH_TUPLE(pitches); + desc.image_row_pitch = pitches[0]; + desc.image_slice_pitch = pitches[1]; + } + + void image_desc_set_buffer(cl_image_desc &desc, memory_object *mobj) + { + if (mobj) + desc.buffer = mobj->data(); + else + desc.buffer = 0; + } + +#endif +} + + + + +using namespace pyopencl; + + + + +void pyopencl_expose_part_2(py::module &m) +{ + // {{{ image + +#if PYOPENCL_CL_VERSION >= 0x1020 + { + typedef cl_image_desc cls; + py::class_(m, "ImageDescriptor") + .def(py::init<>()) + .def_readwrite("image_type", &cls::image_type) + .def_property("shape", &image_desc_dummy_getter, image_desc_set_shape) + .def_readwrite("array_size", &cls::image_array_size) + .def_property("pitches", &image_desc_dummy_getter, image_desc_set_pitches) + .def_readwrite("num_mip_levels", &cls::num_mip_levels) + .def_readwrite("num_samples", &cls::num_samples) + .def_property("buffer", &image_desc_dummy_getter, image_desc_set_buffer) + ; + } +#endif + + { + typedef image cls; + py::class_(m, "Image", py::dynamic_attr()) + .def( + py::init( + []( + context const &ctx, + cl_mem_flags flags, + cl_image_format const &fmt, + py::sequence shape, + py::sequence pitches, + py::object buffer) + { + return create_image(ctx, flags, fmt, shape, pitches, buffer); + }), + py::arg("context"), + py::arg("flags"), + py::arg("format"), + py::arg("shape")=py::none(), + py::arg("pitches")=py::none(), + py::arg("hostbuf")=py::none() + ) +#if PYOPENCL_CL_VERSION >= 0x1020 + .def( + py::init( + []( + context const &ctx, + cl_mem_flags flags, + cl_image_format const &fmt, + cl_image_desc &desc, + py::object buffer) + { + return create_image_from_desc(ctx, flags, fmt, desc, buffer); + }), + py::arg("context"), + py::arg("flags"), + py::arg("format"), + py::arg("desc"), + py::arg("hostbuf")=py::none() + ) +#endif + .DEF_SIMPLE_METHOD(get_image_info) + ; + } + + { + typedef cl_image_format cls; + py::class_(m, "ImageFormat") + .def( + py::init( + [](cl_channel_order ord, cl_channel_type tp) + { + return make_image_format(ord, tp); + })) + .def_readwrite("channel_order", &cls::image_channel_order) + .def_readwrite("channel_data_type", &cls::image_channel_data_type) + .def_property_readonly("channel_count", &get_image_format_channel_count) + .def_property_readonly("dtype_size", &get_image_format_channel_dtype_size) + .def_property_readonly("itemsize", &get_image_format_item_size) + ; + } + + DEF_SIMPLE_FUNCTION(get_supported_image_formats); + + m.def("_enqueue_read_image", enqueue_read_image, + py::arg("queue"), + py::arg("mem"), + py::arg("origin"), + py::arg("region"), + py::arg("hostbuf"), + py::arg("row_pitch")=0, + py::arg("slice_pitch")=0, + py::arg("wait_for")=py::none(), + py::arg("is_blocking")=true + ); + m.def("_enqueue_write_image", enqueue_write_image, + py::arg("queue"), + py::arg("mem"), + py::arg("origin"), + py::arg("region"), + py::arg("hostbuf"), + py::arg("row_pitch")=0, + py::arg("slice_pitch")=0, + py::arg("wait_for")=py::none(), + py::arg("is_blocking")=true + ); + + m.def("_enqueue_copy_image", enqueue_copy_image, + py::arg("queue"), + py::arg("src"), + py::arg("dest"), + py::arg("src_origin"), + py::arg("dest_origin"), + py::arg("region"), + py::arg("wait_for")=py::none() + ); + m.def("_enqueue_copy_image_to_buffer", enqueue_copy_image_to_buffer, + py::arg("queue"), + py::arg("src"), + py::arg("dest"), + py::arg("origin"), + py::arg("region"), + py::arg("offset"), + py::arg("wait_for")=py::none() + ); + m.def("_enqueue_copy_buffer_to_image", enqueue_copy_buffer_to_image, + py::arg("queue"), + py::arg("src"), + py::arg("dest"), + py::arg("offset"), + py::arg("origin"), + py::arg("region"), + py::arg("wait_for")=py::none() + ); + +#if PYOPENCL_CL_VERSION >= 0x1020 + m.def("enqueue_fill_image", enqueue_fill_image, + py::arg("queue"), + py::arg("mem"), + py::arg("color"), + py::arg("origin"), + py::arg("region"), + py::arg("wait_for")=py::none() + ); +#endif + + // }}} + + // {{{ memory_map + { + typedef memory_map cls; + py::class_(m, "MemoryMap", py::dynamic_attr()) + .def("release", &cls::release, + py::arg("queue").none(true)=nullptr, + py::arg("wait_for")=py::none() + ) + ; + } + + // FIXME: Reenable in pypy +#ifndef PYPY_VERSION + m.def("enqueue_map_buffer", enqueue_map_buffer, + py::arg("queue"), + py::arg("buf"), + py::arg("flags"), + py::arg("offset"), + py::arg("shape"), + py::arg("dtype"), + py::arg("order")="C", + py::arg("strides")=py::none(), + py::arg("wait_for")=py::none(), + py::arg("is_blocking")=true); + m.def("enqueue_map_image", enqueue_map_image, + py::arg("queue"), + py::arg("img"), + py::arg("flags"), + py::arg("origin"), + py::arg("region"), + py::arg("shape"), + py::arg("dtype"), + py::arg("order")="C", + py::arg("strides")=py::none(), + py::arg("wait_for")=py::none(), + py::arg("is_blocking")=true); +#endif + + // }}} + + // {{{ svm + +#if PYOPENCL_CL_VERSION >= 0x2000 + { + typedef svm_arg_wrapper cls; + py::class_(m, "SVM", py::dynamic_attr()) + .def(py::init()) + ; + } + + { + typedef svm_allocation cls; + py::class_(m, "SVMAllocation", py::dynamic_attr()) + .def(py::init, size_t, cl_uint, cl_svm_mem_flags>()) + .DEF_SIMPLE_METHOD(release) + .def("enqueue_release", &cls::enqueue_release, + ":returns: a :class:`pyopencl.Event`\n\n" + "|std-enqueue-blurb|") + .def("_ptr_as_int", &cls::ptr_as_int) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::ptr_as_int) + ; + } + + m.def("_enqueue_svm_memcpyw", enqueue_svm_memcpy, + py::arg("queue"), + py::arg("is_blocking"), + py::arg("dst"), + py::arg("src"), + py::arg("wait_for")=py::none() + ); + + m.def("_enqueue_svm_memfill", enqueue_svm_memfill, + py::arg("queue"), + py::arg("dst"), + py::arg("pattern"), + py::arg("byte_count")=py::none(), + py::arg("wait_for")=py::none() + ); + + m.def("_enqueue_svm_map", enqueue_svm_map, + py::arg("queue"), + py::arg("is_blocking"), + py::arg("flags"), + py::arg("svm"), + py::arg("wait_for")=py::none() + ); + + m.def("_enqueue_svm_unmap", enqueue_svm_unmap, + py::arg("queue"), + py::arg("svm"), + py::arg("wait_for")=py::none() + ); +#endif + +#if PYOPENCL_CL_VERSION >= 0x2010 + m.def("_enqueue_svm_migrate_mem", enqueue_svm_migratemem, + py::arg("queue"), + py::arg("svms"), + py::arg("flags")=py::none(), + py::arg("wait_for")=py::none() + ); +#endif + + // }}} + + // {{{ sampler + { + typedef sampler cls; + py::class_(m, "Sampler", py::dynamic_attr()) +#if PYOPENCL_CL_VERSION >= 0x2000 + .def(py::init()) +#endif + .def(py::init()) + .DEF_SIMPLE_METHOD(get_info) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_sampler) + ; + } + + // }}} + + // {{{ program + { + typedef program cls; + py::enum_(m, "program_kind") + .value("UNKNOWN", cls::KND_UNKNOWN) + .value("SOURCE", cls::KND_SOURCE) + .value("BINARY", cls::KND_BINARY) + ; + + py::class_(m, "_Program", py::dynamic_attr()) + .def( + py::init( + [](context &ctx, std::string const &src) + { + return create_program_with_source(ctx, src); + }), + py::arg("context"), + py::arg("src")) + .def( + py::init( + [](context &ctx, py::sequence devices, py::sequence binaries) + { + return create_program_with_binary(ctx, devices, binaries); + }), + py::arg("context"), + py::arg("devices"), + py::arg("binaries")) +#if (PYOPENCL_CL_VERSION >= 0x1020) && \ + ((PYOPENCL_CL_VERSION >= 0x1030) && defined(__APPLE__)) + .def_static("create_with_built_in_kernels", + create_program_with_built_in_kernels, + py::arg("context"), + py::arg("devices"), + py::arg("kernel_names"), + py::return_value_policy()) +#endif + .DEF_SIMPLE_METHOD(kind) + .DEF_SIMPLE_METHOD(get_info) + .DEF_SIMPLE_METHOD(get_build_info) + .def("_build", &cls::build, + py::arg("options")="", + py::arg("devices")=py::none()) +#if PYOPENCL_CL_VERSION >= 0x1020 + .def("compile", &cls::compile, + py::arg("options")="", + py::arg("devices")=py::none(), + py::arg("headers")=py::list()) + .def_static("link", &link_program, + py::arg("context"), + py::arg("programs"), + py::arg("options")="", + py::arg("devices")=py::none() + ) +#endif + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + .def("all_kernels", create_kernels_in_program) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_program) + ; + } + +#if PYOPENCL_CL_VERSION >= 0x1020 + m.def("unload_platform_compiler", unload_platform_compiler); +#endif + + // }}} + + // {{{ kernel + + { + typedef kernel cls; + py::class_(m, "Kernel", py::dynamic_attr()) + .def(py::init()) + .DEF_SIMPLE_METHOD(get_info) + .DEF_SIMPLE_METHOD(get_work_group_info) + .def("_set_arg_null", &cls::set_arg_null) + .def("_set_arg_buf", &cls::set_arg_buf) +#if PYOPENCL_CL_VERSION >= 0x2000 + .def("_set_arg_svm", &cls::set_arg_svm) +#endif + .DEF_SIMPLE_METHOD(set_arg) +#if PYOPENCL_CL_VERSION >= 0x1020 + .DEF_SIMPLE_METHOD(get_arg_info) +#endif + .def(py::self == py::self) + .def(py::self != py::self) + .def("__hash__", &cls::hash) + PYOPENCL_EXPOSE_TO_FROM_INT_PTR(cl_kernel) + ; + } + + { + typedef local_memory cls; + py::class_(m, "LocalMemory", py::dynamic_attr()) + .def( + py::init(), + py::arg("size")) + .def_property_readonly("size", &cls::size) + ; + } + + + m.def("enqueue_nd_range_kernel", enqueue_nd_range_kernel, + py::arg("queue"), + py::arg("kernel"), + py::arg("global_work_size"), + py::arg("local_work_size"), + py::arg("global_work_offset")=py::none(), + py::arg("wait_for")=py::none(), + py::arg("g_times_l")=false + ); + + // TODO: clEnqueueNativeKernel + // }}} + + // {{{ GL interop + DEF_SIMPLE_FUNCTION(have_gl); + +#ifdef HAVE_GL + +#ifdef __APPLE__ + DEF_SIMPLE_FUNCTION(get_apple_cgl_share_group); +#endif /* __APPLE__ */ + + { + typedef gl_buffer cls; + py::class_(m, "GLBuffer", py::dynamic_attr()) + .def( + py::init( + [](context &ctx, cl_mem_flags flags, GLuint bufobj) + { + return create_from_gl_buffer(ctx, flags, bufobj); + }), + py::arg("context"), + py::arg("flags"), + py::arg("bufobj")) + .def("get_gl_object_info", get_gl_object_info) + ; + } + + { + typedef gl_renderbuffer cls; + py::class_(m, "GLRenderBuffer", py::dynamic_attr()) + .def( + py::init( + [](context &ctx, cl_mem_flags flags, GLuint bufobj) + { + return create_from_gl_renderbuffer(ctx, flags, bufobj); + }), + py::arg("context"), + py::arg("flags"), + py::arg("bufobj")) + .def("get_gl_object_info", get_gl_object_info) + ; + } + + { + typedef gl_texture cls; + py::class_(m, "GLTexture", py::dynamic_attr()) + .def( + py::init( + [](context &ctx, cl_mem_flags flags, GLenum texture_target, + GLint miplevel, GLuint texture, unsigned dims) + { + return create_from_gl_texture(ctx, flags, texture_target, miplevel, texture, dims); + }), + py::arg("context"), + py::arg("flags"), + py::arg("texture_target"), + py::arg("miplevel"), + py::arg("texture"), + py::arg("dims")) + .def("get_gl_object_info", get_gl_object_info) + .DEF_SIMPLE_METHOD(get_gl_texture_info) + ; + } + + m.def("enqueue_acquire_gl_objects", enqueue_acquire_gl_objects, + py::arg("queue"), + py::arg("mem_objects"), + py::arg("wait_for")=py::none() + ); + m.def("enqueue_release_gl_objects", enqueue_release_gl_objects, + py::arg("queue"), + py::arg("mem_objects"), + py::arg("wait_for")=py::none() + ); + +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) + m.def("get_gl_context_info_khr", get_gl_context_info_khr, + py::arg("properties"), + py::arg("param_name"), + py::arg("platform")=py::none() + ); +#endif + +#endif + // }}} + + // {{{ CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD + + { + typedef cl_device_topology_amd cls; + py::class_(m, "DeviceTopologyAmd") + .def(py::init( + [](cl_char bus, cl_char device, cl_char function) + { + cl_device_topology_amd result; + result.pcie.bus = bus; + result.pcie.device = device; + result.pcie.function = function; + return result; + }), + py::arg("bus")=0, + py::arg("device")=0, + py::arg("function")=0) + + .def_property("type", + [](cls &t) { return t.pcie.type; }, + [](cls &t, cl_uint val) { t.pcie.type = val; }) + + .def_property("bus", + [](cls &t) { return t.pcie.bus; }, + [](cls &t, cl_char val) { t.pcie.bus = val; }) + .def_property("device", + [](cls &t) { return t.pcie.device; }, + [](cls &t, cl_char val) { t.pcie.device = val; }) + .def_property("function", + [](cls &t) { return t.pcie.function; }, + [](cls &t, cl_char val) { t.pcie.function = val; }) + ; + } + + // }}} +} + + +// vim: foldmethod=marker diff --git a/src/wrap_constants.cpp b/src/wrap_constants.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7b6a97f16fbf2083534d8175b0395981461c0b70 --- /dev/null +++ b/src/wrap_constants.cpp @@ -0,0 +1,988 @@ +#include "wrap_cl.hpp" + + +using namespace pyopencl; + + +namespace +{ + // {{{ 'fake' constant scopes + class status_code { }; + class platform_info { }; + class device_type { }; + class device_info { }; + class device_fp_config { }; + class device_mem_cache_type { }; + class device_local_mem_type { }; + class device_exec_capabilities { }; + class device_svm_capabilities { }; + class command_queue_properties { }; + class context_info { }; + class gl_context_info { }; + class context_properties { }; + class command_queue_info { }; + class queue_properties { }; + class mem_flags { }; + class svm_mem_flags { }; + class channel_order { }; + class channel_type { }; + class mem_object_type { }; + class mem_info { }; + class image_info { }; + class addressing_mode { }; + class filter_mode { }; + class sampler_info { }; + class map_flags { }; + class program_info { }; + class program_build_info { }; + class program_binary_type { }; + class build_status { }; + class kernel_info { }; + class kernel_arg_info { }; + class kernel_arg_address_qualifier { }; + class kernel_arg_access_qualifier { }; + class kernel_arg_type_qualifier { }; + class kernel_work_group_info { }; + class event_info { }; + class command_type { }; + class command_execution_status { }; + class profiling_info { }; + class buffer_create_type { }; + class mem_migration_flags { }; + + class device_partition_property { }; + class device_affinity_domain { }; + + class gl_object_type { }; + class gl_texture_info { }; + + // }}} +} + + +void pyopencl_expose_constants(py::module &m) +{ + // {{{ exceptions + { +#define DECLARE_EXC(NAME, BASE) \ + static py::exception CL##NAME(m, #NAME, BASE); + + DECLARE_EXC(Error, NULL); + DECLARE_EXC(MemoryError, CLError.ptr()); + DECLARE_EXC(LogicError, CLError.ptr()); + DECLARE_EXC(RuntimeError, CLError.ptr()); + + py::register_exception_translator( + [](std::exception_ptr p) + { + try + { + if (p) std::rethrow_exception(p); + } + catch (pyopencl::error &err) + { + py::object err_obj = py::cast(err); + if (err.code() == CL_MEM_OBJECT_ALLOCATION_FAILURE) + PyErr_SetObject(CLMemoryError.ptr(), err_obj.ptr()); + else if (err.code() <= CL_INVALID_VALUE) + PyErr_SetObject(CLLogicError.ptr(), err_obj.ptr()); + else if (err.code() > CL_INVALID_VALUE && err.code() < CL_SUCCESS) + PyErr_SetObject(CLRuntimeError.ptr(), err_obj.ptr()); + else + PyErr_SetObject(CLError.ptr(), err_obj.ptr()); + } + }); + } + // }}} + + // {{{ error record + + { + typedef error cls; + py::class_ (m, "_ErrorRecord") + .def(py::init(), + py::arg("routine"), + py::arg("code"), + py::arg("msg")) + .DEF_SIMPLE_METHOD(routine) + .DEF_SIMPLE_METHOD(code) + .DEF_SIMPLE_METHOD(what) + .DEF_SIMPLE_METHOD(is_out_of_memory) + ; + } + + // }}} + + // {{{ constants +#define ADD_ATTR(PREFIX, NAME) \ + cls.attr(#NAME) = CL_##PREFIX##NAME +#define ADD_ATTR_SUFFIX(PREFIX, NAME, SUFFIX) \ + cls.attr(#NAME) = CL_##PREFIX##NAME##SUFFIX + + { + py::class_ cls(m, "status_code"); + + ADD_ATTR(, SUCCESS); + ADD_ATTR(, DEVICE_NOT_FOUND); + ADD_ATTR(, DEVICE_NOT_AVAILABLE); +#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) + ADD_ATTR(, COMPILER_NOT_AVAILABLE); +#endif + ADD_ATTR(, MEM_OBJECT_ALLOCATION_FAILURE); + ADD_ATTR(, OUT_OF_RESOURCES); + ADD_ATTR(, OUT_OF_HOST_MEMORY); + ADD_ATTR(, PROFILING_INFO_NOT_AVAILABLE); + ADD_ATTR(, MEM_COPY_OVERLAP); + ADD_ATTR(, IMAGE_FORMAT_MISMATCH); + ADD_ATTR(, IMAGE_FORMAT_NOT_SUPPORTED); + ADD_ATTR(, BUILD_PROGRAM_FAILURE); + ADD_ATTR(, MAP_FAILURE); + + ADD_ATTR(, INVALID_VALUE); + ADD_ATTR(, INVALID_DEVICE_TYPE); + ADD_ATTR(, INVALID_PLATFORM); + ADD_ATTR(, INVALID_DEVICE); + ADD_ATTR(, INVALID_CONTEXT); + ADD_ATTR(, INVALID_QUEUE_PROPERTIES); + ADD_ATTR(, INVALID_COMMAND_QUEUE); + ADD_ATTR(, INVALID_HOST_PTR); + ADD_ATTR(, INVALID_MEM_OBJECT); + ADD_ATTR(, INVALID_IMAGE_FORMAT_DESCRIPTOR); + ADD_ATTR(, INVALID_IMAGE_SIZE); + ADD_ATTR(, INVALID_SAMPLER); + ADD_ATTR(, INVALID_BINARY); + ADD_ATTR(, INVALID_BUILD_OPTIONS); + ADD_ATTR(, INVALID_PROGRAM); + ADD_ATTR(, INVALID_PROGRAM_EXECUTABLE); + ADD_ATTR(, INVALID_KERNEL_NAME); + ADD_ATTR(, INVALID_KERNEL_DEFINITION); + ADD_ATTR(, INVALID_KERNEL); + ADD_ATTR(, INVALID_ARG_INDEX); + ADD_ATTR(, INVALID_ARG_VALUE); + ADD_ATTR(, INVALID_ARG_SIZE); + ADD_ATTR(, INVALID_KERNEL_ARGS); + ADD_ATTR(, INVALID_WORK_DIMENSION); + ADD_ATTR(, INVALID_WORK_GROUP_SIZE); + ADD_ATTR(, INVALID_WORK_ITEM_SIZE); + ADD_ATTR(, INVALID_GLOBAL_OFFSET); + ADD_ATTR(, INVALID_EVENT_WAIT_LIST); + ADD_ATTR(, INVALID_EVENT); + ADD_ATTR(, INVALID_OPERATION); + ADD_ATTR(, INVALID_GL_OBJECT); + ADD_ATTR(, INVALID_BUFFER_SIZE); + ADD_ATTR(, INVALID_MIP_LEVEL); + +#if defined(cl_khr_icd) && (cl_khr_icd >= 1) + ADD_ATTR(, PLATFORM_NOT_FOUND_KHR); +#endif + +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) + ADD_ATTR(, INVALID_GL_SHAREGROUP_REFERENCE_KHR); +#endif + +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(, MISALIGNED_SUB_BUFFER_OFFSET); + ADD_ATTR(, EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST); + ADD_ATTR(, INVALID_GLOBAL_WORK_SIZE); +#endif + +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(, COMPILE_PROGRAM_FAILURE); + ADD_ATTR(, LINKER_NOT_AVAILABLE); + ADD_ATTR(, LINK_PROGRAM_FAILURE); + ADD_ATTR(, DEVICE_PARTITION_FAILED); + ADD_ATTR(, KERNEL_ARG_INFO_NOT_AVAILABLE); + ADD_ATTR(, INVALID_IMAGE_DESCRIPTOR); + ADD_ATTR(, INVALID_COMPILER_OPTIONS); + ADD_ATTR(, INVALID_LINKER_OPTIONS); + ADD_ATTR(, INVALID_DEVICE_PARTITION_COUNT); +#endif + +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(, INVALID_PIPE_SIZE); + ADD_ATTR(, INVALID_DEVICE_QUEUE); +#endif + +#if defined(cl_ext_device_fission) && defined(PYOPENCL_USE_DEVICE_FISSION) + ADD_ATTR(, DEVICE_PARTITION_FAILED_EXT); + ADD_ATTR(, INVALID_PARTITION_COUNT_EXT); + ADD_ATTR(, INVALID_PARTITION_NAME_EXT); +#endif + } + + { + py::class_ cls(m, "platform_info"); + ADD_ATTR(PLATFORM_, PROFILE); + ADD_ATTR(PLATFORM_, VERSION); + ADD_ATTR(PLATFORM_, NAME); + ADD_ATTR(PLATFORM_, VENDOR); +#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) + ADD_ATTR(PLATFORM_, EXTENSIONS); +#endif + } + + { + py::class_ cls(m, "device_type"); + ADD_ATTR(DEVICE_TYPE_, DEFAULT); + ADD_ATTR(DEVICE_TYPE_, CPU); + ADD_ATTR(DEVICE_TYPE_, GPU); + ADD_ATTR(DEVICE_TYPE_, ACCELERATOR); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(DEVICE_TYPE_, CUSTOM); +#endif + ADD_ATTR(DEVICE_TYPE_, ALL); + } + + { + py::class_ cls(m, "device_info"); + ADD_ATTR(DEVICE_, TYPE); + ADD_ATTR(DEVICE_, VENDOR_ID); + ADD_ATTR(DEVICE_, MAX_COMPUTE_UNITS); + ADD_ATTR(DEVICE_, MAX_WORK_ITEM_DIMENSIONS); + ADD_ATTR(DEVICE_, MAX_WORK_GROUP_SIZE); + ADD_ATTR(DEVICE_, MAX_WORK_ITEM_SIZES); + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_CHAR); + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_SHORT); + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_INT); + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_LONG); + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_FLOAT); + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_DOUBLE); + ADD_ATTR(DEVICE_, MAX_CLOCK_FREQUENCY); + ADD_ATTR(DEVICE_, ADDRESS_BITS); + ADD_ATTR(DEVICE_, MAX_READ_IMAGE_ARGS); + ADD_ATTR(DEVICE_, MAX_WRITE_IMAGE_ARGS); + ADD_ATTR(DEVICE_, MAX_MEM_ALLOC_SIZE); + ADD_ATTR(DEVICE_, IMAGE2D_MAX_WIDTH); + ADD_ATTR(DEVICE_, IMAGE2D_MAX_HEIGHT); + ADD_ATTR(DEVICE_, IMAGE3D_MAX_WIDTH); + ADD_ATTR(DEVICE_, IMAGE3D_MAX_HEIGHT); + ADD_ATTR(DEVICE_, IMAGE3D_MAX_DEPTH); + ADD_ATTR(DEVICE_, IMAGE_SUPPORT); + ADD_ATTR(DEVICE_, MAX_PARAMETER_SIZE); + ADD_ATTR(DEVICE_, MAX_SAMPLERS); + ADD_ATTR(DEVICE_, MEM_BASE_ADDR_ALIGN); + ADD_ATTR(DEVICE_, MIN_DATA_TYPE_ALIGN_SIZE); + ADD_ATTR(DEVICE_, SINGLE_FP_CONFIG); +#ifdef CL_DEVICE_DOUBLE_FP_CONFIG + ADD_ATTR(DEVICE_, DOUBLE_FP_CONFIG); +#endif +#ifdef CL_DEVICE_HALF_FP_CONFIG + ADD_ATTR(DEVICE_, HALF_FP_CONFIG); +#endif + ADD_ATTR(DEVICE_, GLOBAL_MEM_CACHE_TYPE); + ADD_ATTR(DEVICE_, GLOBAL_MEM_CACHELINE_SIZE); + ADD_ATTR(DEVICE_, GLOBAL_MEM_CACHE_SIZE); + ADD_ATTR(DEVICE_, GLOBAL_MEM_SIZE); + ADD_ATTR(DEVICE_, MAX_CONSTANT_BUFFER_SIZE); + ADD_ATTR(DEVICE_, MAX_CONSTANT_ARGS); + ADD_ATTR(DEVICE_, LOCAL_MEM_TYPE); + ADD_ATTR(DEVICE_, LOCAL_MEM_SIZE); + ADD_ATTR(DEVICE_, ERROR_CORRECTION_SUPPORT); + ADD_ATTR(DEVICE_, PROFILING_TIMER_RESOLUTION); + ADD_ATTR(DEVICE_, ENDIAN_LITTLE); + ADD_ATTR(DEVICE_, AVAILABLE); + ADD_ATTR(DEVICE_, COMPILER_AVAILABLE); + ADD_ATTR(DEVICE_, EXECUTION_CAPABILITIES); + ADD_ATTR(DEVICE_, QUEUE_PROPERTIES); +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(DEVICE_, QUEUE_ON_HOST_PROPERTIES); +#endif + ADD_ATTR(DEVICE_, NAME); + ADD_ATTR(DEVICE_, VENDOR); + ADD_ATTR(, DRIVER_VERSION); + ADD_ATTR(DEVICE_, VERSION); + ADD_ATTR(DEVICE_, PROFILE); + ADD_ATTR(DEVICE_, VERSION); + ADD_ATTR(DEVICE_, EXTENSIONS); + ADD_ATTR(DEVICE_, PLATFORM); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(DEVICE_, PREFERRED_VECTOR_WIDTH_HALF); + ADD_ATTR(DEVICE_, HOST_UNIFIED_MEMORY); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_CHAR); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_SHORT); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_INT); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_LONG); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_FLOAT); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_DOUBLE); + ADD_ATTR(DEVICE_, NATIVE_VECTOR_WIDTH_HALF); + ADD_ATTR(DEVICE_, OPENCL_C_VERSION); +#endif +// support for cl_nv_device_attribute_query +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV + ADD_ATTR(DEVICE_, COMPUTE_CAPABILITY_MAJOR_NV); + ADD_ATTR(DEVICE_, COMPUTE_CAPABILITY_MINOR_NV); + ADD_ATTR(DEVICE_, REGISTERS_PER_BLOCK_NV); + ADD_ATTR(DEVICE_, WARP_SIZE_NV); + ADD_ATTR(DEVICE_, GPU_OVERLAP_NV); + ADD_ATTR(DEVICE_, KERNEL_EXEC_TIMEOUT_NV); + ADD_ATTR(DEVICE_, INTEGRATED_MEMORY_NV); + // Nvidia specific device attributes, not defined in Khronos CL/cl_ext.h +#ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV + ADD_ATTR(DEVICE_, ATTRIBUTE_ASYNC_ENGINE_COUNT_NV); +#endif +#ifdef CL_DEVICE_PCI_BUS_ID_NV + ADD_ATTR(DEVICE_, PCI_BUS_ID_NV); +#endif +#ifdef CL_DEVICE_PCI_SLOT_ID_NV + ADD_ATTR(DEVICE_, PCI_SLOT_ID_NV); +#endif +#endif +// {{{ cl_amd_device_attribute_query +#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD + ADD_ATTR(DEVICE_, PROFILING_TIMER_OFFSET_AMD); +#endif +#ifdef CL_DEVICE_TOPOLOGY_AMD + ADD_ATTR(DEVICE_, TOPOLOGY_AMD); +#endif +#ifdef CL_DEVICE_BOARD_NAME_AMD + ADD_ATTR(DEVICE_, BOARD_NAME_AMD); +#endif +#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD + ADD_ATTR(DEVICE_, GLOBAL_FREE_MEMORY_AMD); +#endif +#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD + ADD_ATTR(DEVICE_, SIMD_PER_COMPUTE_UNIT_AMD); +#endif +#ifdef CL_DEVICE_SIMD_WIDTH_AMD + ADD_ATTR(DEVICE_, SIMD_WIDTH_AMD); +#endif +#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD + ADD_ATTR(DEVICE_, SIMD_INSTRUCTION_WIDTH_AMD); +#endif +#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD + ADD_ATTR(DEVICE_, WAVEFRONT_WIDTH_AMD); +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD + ADD_ATTR(DEVICE_, GLOBAL_MEM_CHANNELS_AMD); +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD + ADD_ATTR(DEVICE_, GLOBAL_MEM_CHANNEL_BANKS_AMD); +#endif +#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD + ADD_ATTR(DEVICE_, GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD); +#endif +#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD + ADD_ATTR(DEVICE_, LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD); +#endif +#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD + ADD_ATTR(DEVICE_, LOCAL_MEM_BANKS_AMD); +#endif +// }}} +#ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD + ADD_ATTR(DEVICE_, THREAD_TRACE_SUPPORTED_AMD); +#endif +#ifdef CL_DEVICE_GFXIP_MAJOR_AMD + ADD_ATTR(DEVICE_, GFXIP_MAJOR_AMD); +#endif +#ifdef CL_DEVICE_GFXIP_MINOR_AMD + ADD_ATTR(DEVICE_, GFXIP_MINOR_AMD); +#endif +#ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD + ADD_ATTR(DEVICE_, AVAILABLE_ASYNC_QUEUES_AMD); +#endif + +#ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT + ADD_ATTR(DEVICE_, MAX_ATOMIC_COUNTERS_EXT); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(DEVICE_, LINKER_AVAILABLE); + ADD_ATTR(DEVICE_, BUILT_IN_KERNELS); + ADD_ATTR(DEVICE_, IMAGE_MAX_BUFFER_SIZE); + ADD_ATTR(DEVICE_, IMAGE_MAX_ARRAY_SIZE); + ADD_ATTR(DEVICE_, PARENT_DEVICE); + ADD_ATTR(DEVICE_, PARTITION_MAX_SUB_DEVICES); + ADD_ATTR(DEVICE_, PARTITION_PROPERTIES); + ADD_ATTR(DEVICE_, PARTITION_AFFINITY_DOMAIN); + ADD_ATTR(DEVICE_, PARTITION_TYPE); + ADD_ATTR(DEVICE_, REFERENCE_COUNT); + ADD_ATTR(DEVICE_, PREFERRED_INTEROP_USER_SYNC); + ADD_ATTR(DEVICE_, PRINTF_BUFFER_SIZE); +#endif +#ifdef cl_khr_image2d_from_buffer + ADD_ATTR(DEVICE_, IMAGE_PITCH_ALIGNMENT); + ADD_ATTR(DEVICE_, IMAGE_BASE_ADDRESS_ALIGNMENT); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(DEVICE_, MAX_READ_WRITE_IMAGE_ARGS); + ADD_ATTR(DEVICE_, MAX_GLOBAL_VARIABLE_SIZE); + ADD_ATTR(DEVICE_, QUEUE_ON_DEVICE_PROPERTIES); + ADD_ATTR(DEVICE_, QUEUE_ON_DEVICE_PREFERRED_SIZE); + ADD_ATTR(DEVICE_, QUEUE_ON_DEVICE_MAX_SIZE); + ADD_ATTR(DEVICE_, MAX_ON_DEVICE_QUEUES); + ADD_ATTR(DEVICE_, MAX_ON_DEVICE_EVENTS); + ADD_ATTR(DEVICE_, SVM_CAPABILITIES); + ADD_ATTR(DEVICE_, GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE); + ADD_ATTR(DEVICE_, MAX_PIPE_ARGS); + ADD_ATTR(DEVICE_, PIPE_MAX_ACTIVE_RESERVATIONS); + ADD_ATTR(DEVICE_, PIPE_MAX_PACKET_SIZE); + ADD_ATTR(DEVICE_, PREFERRED_PLATFORM_ATOMIC_ALIGNMENT); + ADD_ATTR(DEVICE_, PREFERRED_GLOBAL_ATOMIC_ALIGNMENT); + ADD_ATTR(DEVICE_, PREFERRED_LOCAL_ATOMIC_ALIGNMENT); +#endif +#if PYOPENCL_CL_VERSION >= 0x2010 + ADD_ATTR(DEVICE_, IL_VERSION); + ADD_ATTR(DEVICE_, MAX_NUM_SUB_GROUPS); + ADD_ATTR(DEVICE_, SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS); +#endif + /* cl_intel_advanced_motion_estimation */ +#ifdef CL_DEVICE_ME_VERSION_INTEL + ADD_ATTR(DEVICE_, ME_VERSION_INTEL); +#endif + + /* cl_qcom_ext_host_ptr */ +#ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM + ADD_ATTR(DEVICE_, EXT_MEM_PADDING_IN_BYTES_QCOM); +#endif +#ifdef CL_DEVICE_PAGE_SIZE_QCOM + ADD_ATTR(DEVICE_, PAGE_SIZE_QCOM); +#endif + + /* cl_khr_spir */ +#ifdef CL_DEVICE_SPIR_VERSIONS + ADD_ATTR(DEVICE_, SPIR_VERSIONS); +#endif + + /* cl_altera_device_temperature */ +#ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA + ADD_ATTR(DEVICE_, CORE_TEMPERATURE_ALTERA); +#endif + + /* cl_intel_simultaneous_sharing */ +#ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL + ADD_ATTR(DEVICE_, SIMULTANEOUS_INTEROPS_INTEL); +#endif +#ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL + ADD_ATTR(DEVICE_, NUM_SIMULTANEOUS_INTEROPS_INTEL); +#endif + } + + { + py::class_ cls(m, "device_fp_config"); + ADD_ATTR(FP_, DENORM); + ADD_ATTR(FP_, INF_NAN); + ADD_ATTR(FP_, ROUND_TO_NEAREST); + ADD_ATTR(FP_, ROUND_TO_ZERO); + ADD_ATTR(FP_, ROUND_TO_INF); + ADD_ATTR(FP_, FMA); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(FP_, SOFT_FLOAT); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(FP_, CORRECTLY_ROUNDED_DIVIDE_SQRT); +#endif + } + + { + py::class_ cls(m, "device_mem_cache_type"); + ADD_ATTR( , NONE); + ADD_ATTR( , READ_ONLY_CACHE); + ADD_ATTR( , READ_WRITE_CACHE); + } + + { + py::class_ cls(m, "device_local_mem_type"); + ADD_ATTR( , LOCAL); + ADD_ATTR( , GLOBAL); + } + + { + py::class_ cls(m, "device_exec_capabilities"); + ADD_ATTR(EXEC_, KERNEL); + ADD_ATTR(EXEC_, NATIVE_KERNEL); +#ifdef CL_EXEC_IMMEDIATE_EXECUTION_INTEL + ADD_ATTR(EXEC_, IMMEDIATE_EXECUTION_INTEL); +#endif + } + + { + py::class_ cls(m, "device_svm_capabilities"); +#if PYOPENCL_CL_VERSION >= 0x2000 + // device_svm_capabilities + ADD_ATTR(DEVICE_SVM_, COARSE_GRAIN_BUFFER); + ADD_ATTR(DEVICE_SVM_, FINE_GRAIN_BUFFER); + ADD_ATTR(DEVICE_SVM_, FINE_GRAIN_SYSTEM); + ADD_ATTR(DEVICE_SVM_, ATOMICS); +#endif + } + + { + py::class_ cls(m, "command_queue_properties"); + ADD_ATTR(QUEUE_, OUT_OF_ORDER_EXEC_MODE_ENABLE); + ADD_ATTR(QUEUE_, PROFILING_ENABLE); +#ifdef CL_QUEUE_IMMEDIATE_EXECUTION_ENABLE_INTEL + ADD_ATTR(QUEUE_, IMMEDIATE_EXECUTION_ENABLE_INTEL); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(QUEUE_, ON_DEVICE); + ADD_ATTR(QUEUE_, ON_DEVICE_DEFAULT); +#endif + } + + { + py::class_ cls(m, "context_info"); + ADD_ATTR(CONTEXT_, REFERENCE_COUNT); + ADD_ATTR(CONTEXT_, DEVICES); + ADD_ATTR(CONTEXT_, PROPERTIES); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(CONTEXT_, NUM_DEVICES); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(CONTEXT_, INTEROP_USER_SYNC); +#endif + } + + { + py::class_ cls(m, "gl_context_info"); +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) + ADD_ATTR(, CURRENT_DEVICE_FOR_GL_CONTEXT_KHR); + ADD_ATTR(, DEVICES_FOR_GL_CONTEXT_KHR); +#endif + } + + { + py::class_ cls(m, "context_properties"); + ADD_ATTR(CONTEXT_, PLATFORM); +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) + ADD_ATTR( ,GL_CONTEXT_KHR); + ADD_ATTR( ,EGL_DISPLAY_KHR); + ADD_ATTR( ,GLX_DISPLAY_KHR); + ADD_ATTR( ,WGL_HDC_KHR); + ADD_ATTR( ,CGL_SHAREGROUP_KHR); +#endif +#if defined(__APPLE__) && defined(HAVE_GL) + ADD_ATTR( ,CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE); +#endif /* __APPLE__ */ +// cl_amd_offline_devices +#ifdef CL_CONTEXT_OFFLINE_DEVICES_AMD + ADD_ATTR(CONTEXT_, OFFLINE_DEVICES_AMD); +#endif + } + + { + py::class_ cls(m, "command_queue_info"); + ADD_ATTR(QUEUE_, CONTEXT); + ADD_ATTR(QUEUE_, DEVICE); + ADD_ATTR(QUEUE_, REFERENCE_COUNT); + ADD_ATTR(QUEUE_, PROPERTIES); + } + + { + py::class_ cls(m, "queue_properties"); +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(QUEUE_, PROPERTIES); + ADD_ATTR(QUEUE_, SIZE); +#endif + } + + { + py::class_ cls(m, "mem_flags"); + ADD_ATTR(MEM_, READ_WRITE); + ADD_ATTR(MEM_, WRITE_ONLY); + ADD_ATTR(MEM_, READ_ONLY); + ADD_ATTR(MEM_, USE_HOST_PTR); + ADD_ATTR(MEM_, ALLOC_HOST_PTR); + ADD_ATTR(MEM_, COPY_HOST_PTR); +#ifdef cl_amd_device_memory_flags + ADD_ATTR(MEM_, USE_PERSISTENT_MEM_AMD); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(MEM_, HOST_WRITE_ONLY); + ADD_ATTR(MEM_, HOST_READ_ONLY); + ADD_ATTR(MEM_, HOST_NO_ACCESS); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(MEM_, KERNEL_READ_AND_WRITE); +#endif + } + + { + py::class_ cls(m, "svm_mem_flags"); +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(MEM_, READ_WRITE); + ADD_ATTR(MEM_, WRITE_ONLY); + ADD_ATTR(MEM_, READ_ONLY); + ADD_ATTR(MEM_, SVM_FINE_GRAIN_BUFFER); + ADD_ATTR(MEM_, SVM_ATOMICS); +#endif + } + + { + py::class_ cls(m, "channel_order"); + ADD_ATTR( , R); + ADD_ATTR( , A); + ADD_ATTR( , RG); + ADD_ATTR( , RA); + ADD_ATTR( , RGB); + ADD_ATTR( , RGBA); + ADD_ATTR( , BGRA); + ADD_ATTR( , INTENSITY); + ADD_ATTR( , LUMINANCE); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR( , Rx); + ADD_ATTR( , RGx); + ADD_ATTR( , RGBx); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR( , sRGB); + ADD_ATTR( , sRGBx); + ADD_ATTR( , sRGBA); + ADD_ATTR( , sBGRA); + ADD_ATTR( , ABGR); +#endif + } + + { + py::class_ cls(m, "channel_type"); + ADD_ATTR( , SNORM_INT8); + ADD_ATTR( , SNORM_INT16); + ADD_ATTR( , UNORM_INT8); + ADD_ATTR( , UNORM_INT16); + ADD_ATTR( , UNORM_SHORT_565); + ADD_ATTR( , UNORM_SHORT_555); + ADD_ATTR( , UNORM_INT_101010); + ADD_ATTR( , SIGNED_INT8); + ADD_ATTR( , SIGNED_INT16); + ADD_ATTR( , SIGNED_INT32); + ADD_ATTR( , UNSIGNED_INT8); + ADD_ATTR( , UNSIGNED_INT16); + ADD_ATTR( , UNSIGNED_INT32); + ADD_ATTR( , HALF_FLOAT); + ADD_ATTR( , FLOAT); + } + + { + py::class_ cls(m, "mem_object_type"); + ADD_ATTR(MEM_OBJECT_, BUFFER); + ADD_ATTR(MEM_OBJECT_, IMAGE2D); + ADD_ATTR(MEM_OBJECT_, IMAGE3D); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(MEM_OBJECT_, IMAGE2D_ARRAY); + ADD_ATTR(MEM_OBJECT_, IMAGE1D); + ADD_ATTR(MEM_OBJECT_, IMAGE1D_ARRAY); + ADD_ATTR(MEM_OBJECT_, IMAGE1D_BUFFER); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(MEM_OBJECT_, PIPE); +#endif + } + + { + py::class_ cls(m, "mem_info"); + ADD_ATTR(MEM_, TYPE); + ADD_ATTR(MEM_, FLAGS); + ADD_ATTR(MEM_, SIZE); + ADD_ATTR(MEM_, HOST_PTR); + ADD_ATTR(MEM_, MAP_COUNT); + ADD_ATTR(MEM_, REFERENCE_COUNT); + ADD_ATTR(MEM_, CONTEXT); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(MEM_, ASSOCIATED_MEMOBJECT); + ADD_ATTR(MEM_, OFFSET); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(MEM_, USES_SVM_POINTER); +#endif + } + + { + py::class_ cls(m, "image_info"); + ADD_ATTR(IMAGE_, FORMAT); + ADD_ATTR(IMAGE_, ELEMENT_SIZE); + ADD_ATTR(IMAGE_, ROW_PITCH); + ADD_ATTR(IMAGE_, SLICE_PITCH); + ADD_ATTR(IMAGE_, WIDTH); + ADD_ATTR(IMAGE_, HEIGHT); + ADD_ATTR(IMAGE_, DEPTH); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(IMAGE_, ARRAY_SIZE); + ADD_ATTR(IMAGE_, BUFFER); + ADD_ATTR(IMAGE_, NUM_MIP_LEVELS); + ADD_ATTR(IMAGE_, NUM_SAMPLES); +#endif + } + + { + py::class_ cls(m, "addressing_mode"); + ADD_ATTR(ADDRESS_, NONE); + ADD_ATTR(ADDRESS_, CLAMP_TO_EDGE); + ADD_ATTR(ADDRESS_, CLAMP); + ADD_ATTR(ADDRESS_, REPEAT); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(ADDRESS_, MIRRORED_REPEAT); +#endif + } + + { + py::class_ cls(m, "filter_mode"); + ADD_ATTR(FILTER_, NEAREST); + ADD_ATTR(FILTER_, LINEAR); + } + + { + py::class_ cls(m, "sampler_info"); + ADD_ATTR(SAMPLER_, REFERENCE_COUNT); + ADD_ATTR(SAMPLER_, CONTEXT); + ADD_ATTR(SAMPLER_, NORMALIZED_COORDS); + ADD_ATTR(SAMPLER_, ADDRESSING_MODE); + ADD_ATTR(SAMPLER_, FILTER_MODE); +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(SAMPLER_, MIP_FILTER_MODE); + ADD_ATTR(SAMPLER_, LOD_MIN); + ADD_ATTR(SAMPLER_, LOD_MAX); +#endif + } + + { + py::class_ cls(m, "map_flags"); + ADD_ATTR(MAP_, READ); + ADD_ATTR(MAP_, WRITE); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(MAP_, WRITE_INVALIDATE_REGION); +#endif + } + + { + py::class_ cls(m, "program_info"); + ADD_ATTR(PROGRAM_, REFERENCE_COUNT); + ADD_ATTR(PROGRAM_, CONTEXT); + ADD_ATTR(PROGRAM_, NUM_DEVICES); + ADD_ATTR(PROGRAM_, DEVICES); + ADD_ATTR(PROGRAM_, SOURCE); + ADD_ATTR(PROGRAM_, BINARY_SIZES); + ADD_ATTR(PROGRAM_, BINARIES); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(PROGRAM_, NUM_KERNELS); + ADD_ATTR(PROGRAM_, KERNEL_NAMES); +#endif + } + + { + py::class_ cls(m, "program_build_info"); + ADD_ATTR(PROGRAM_BUILD_, STATUS); + ADD_ATTR(PROGRAM_BUILD_, OPTIONS); + ADD_ATTR(PROGRAM_BUILD_, LOG); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(PROGRAM_, BINARY_TYPE); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(PROGRAM_BUILD_, GLOBAL_VARIABLE_TOTAL_SIZE); +#endif + } + + { + py::class_ cls(m, "program_binary_type"); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(PROGRAM_BINARY_TYPE_, NONE); + ADD_ATTR(PROGRAM_BINARY_TYPE_, COMPILED_OBJECT); + ADD_ATTR(PROGRAM_BINARY_TYPE_, LIBRARY); + ADD_ATTR(PROGRAM_BINARY_TYPE_, EXECUTABLE); +#endif + } + + { + py::class_ cls(m, "kernel_info"); + ADD_ATTR(KERNEL_, FUNCTION_NAME); + ADD_ATTR(KERNEL_, NUM_ARGS); + ADD_ATTR(KERNEL_, REFERENCE_COUNT); + ADD_ATTR(KERNEL_, CONTEXT); + ADD_ATTR(KERNEL_, PROGRAM); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(KERNEL_, ATTRIBUTES); +#endif + } + + { + py::class_ cls(m, "kernel_arg_info"); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(KERNEL_ARG_, ADDRESS_QUALIFIER); + ADD_ATTR(KERNEL_ARG_, ACCESS_QUALIFIER); + ADD_ATTR(KERNEL_ARG_, TYPE_NAME); + ADD_ATTR(KERNEL_ARG_, TYPE_QUALIFIER); + ADD_ATTR(KERNEL_ARG_, NAME); +#endif + } + + { + py::class_ cls( + m, "kernel_arg_address_qualifier"); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(KERNEL_ARG_ADDRESS_, GLOBAL); + ADD_ATTR(KERNEL_ARG_ADDRESS_, LOCAL); + ADD_ATTR(KERNEL_ARG_ADDRESS_, CONSTANT); + ADD_ATTR(KERNEL_ARG_ADDRESS_, PRIVATE); +#endif + } + + { + py::class_ cls( + m, "kernel_arg_access_qualifier"); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(KERNEL_ARG_ACCESS_, READ_ONLY); + ADD_ATTR(KERNEL_ARG_ACCESS_, WRITE_ONLY); + ADD_ATTR(KERNEL_ARG_ACCESS_, READ_WRITE); + ADD_ATTR(KERNEL_ARG_ACCESS_, NONE); +#endif + } + + { + py::class_ cls( + m, "kernel_arg_type_qualifier"); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(KERNEL_ARG_TYPE_, NONE); + ADD_ATTR(KERNEL_ARG_TYPE_, CONST); + ADD_ATTR(KERNEL_ARG_TYPE_, RESTRICT); + ADD_ATTR(KERNEL_ARG_TYPE_, VOLATILE); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(KERNEL_ARG_TYPE_, PIPE); +#endif + } + + { + py::class_ cls(m, "kernel_work_group_info"); + ADD_ATTR(KERNEL_, WORK_GROUP_SIZE); + ADD_ATTR(KERNEL_, COMPILE_WORK_GROUP_SIZE); + ADD_ATTR(KERNEL_, LOCAL_MEM_SIZE); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(KERNEL_, PREFERRED_WORK_GROUP_SIZE_MULTIPLE); + ADD_ATTR(KERNEL_, PRIVATE_MEM_SIZE); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(KERNEL_, GLOBAL_WORK_SIZE); +#endif + } + + { + py::class_ cls(m, "event_info"); + ADD_ATTR(EVENT_, COMMAND_QUEUE); + ADD_ATTR(EVENT_, COMMAND_TYPE); + ADD_ATTR(EVENT_, REFERENCE_COUNT); + ADD_ATTR(EVENT_, COMMAND_EXECUTION_STATUS); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(EVENT_, CONTEXT); +#endif + } + + { + py::class_ cls(m, "command_type"); + ADD_ATTR(COMMAND_, NDRANGE_KERNEL); + ADD_ATTR(COMMAND_, TASK); + ADD_ATTR(COMMAND_, NATIVE_KERNEL); + ADD_ATTR(COMMAND_, READ_BUFFER); + ADD_ATTR(COMMAND_, WRITE_BUFFER); + ADD_ATTR(COMMAND_, COPY_BUFFER); + ADD_ATTR(COMMAND_, READ_IMAGE); + ADD_ATTR(COMMAND_, WRITE_IMAGE); + ADD_ATTR(COMMAND_, COPY_IMAGE); + ADD_ATTR(COMMAND_, COPY_IMAGE_TO_BUFFER); + ADD_ATTR(COMMAND_, COPY_BUFFER_TO_IMAGE); + ADD_ATTR(COMMAND_, MAP_BUFFER); + ADD_ATTR(COMMAND_, MAP_IMAGE); + ADD_ATTR(COMMAND_, UNMAP_MEM_OBJECT); + ADD_ATTR(COMMAND_, MARKER); + ADD_ATTR(COMMAND_, ACQUIRE_GL_OBJECTS); + ADD_ATTR(COMMAND_, RELEASE_GL_OBJECTS); +#if PYOPENCL_CL_VERSION >= 0x1010 + ADD_ATTR(COMMAND_, READ_BUFFER_RECT); + ADD_ATTR(COMMAND_, WRITE_BUFFER_RECT); + ADD_ATTR(COMMAND_, COPY_BUFFER_RECT); + ADD_ATTR(COMMAND_, USER); +#endif +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(COMMAND_, BARRIER); + ADD_ATTR(COMMAND_, MIGRATE_MEM_OBJECTS); + ADD_ATTR(COMMAND_, FILL_BUFFER); + ADD_ATTR(COMMAND_, FILL_IMAGE); +#endif +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(COMMAND_, SVM_FREE); + ADD_ATTR(COMMAND_, SVM_MEMCPY); + ADD_ATTR(COMMAND_, SVM_MEMFILL); + ADD_ATTR(COMMAND_, SVM_MAP); + ADD_ATTR(COMMAND_, SVM_UNMAP); +#endif + } + + { + py::class_ cls(m, "command_execution_status"); + ADD_ATTR(, COMPLETE); + ADD_ATTR(, RUNNING); + ADD_ATTR(, SUBMITTED); + ADD_ATTR(, QUEUED); + } + + { + py::class_ cls(m, "profiling_info"); + ADD_ATTR(PROFILING_COMMAND_, QUEUED); + ADD_ATTR(PROFILING_COMMAND_, SUBMIT); + ADD_ATTR(PROFILING_COMMAND_, START); + ADD_ATTR(PROFILING_COMMAND_, END); +#if PYOPENCL_CL_VERSION >= 0x2000 + ADD_ATTR(PROFILING_COMMAND_, COMPLETE); +#endif + } + +/* not needed--filled in automatically by implementation. +#if PYOPENCL_CL_VERSION >= 0x1010 + { + py::class_ cls(m, "buffer_create_type"); + ADD_ATTR(BUFFER_CREATE_TYPE_, REGION); + } +#endif +*/ + + { + py::class_ cls( + m, "mem_migration_flags"); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(MIGRATE_MEM_OBJECT_, HOST); + ADD_ATTR(MIGRATE_MEM_OBJECT_, CONTENT_UNDEFINED); +#endif + } + + { + py::class_ cls( + m, "device_partition_property"); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(DEVICE_PARTITION_, EQUALLY); + ADD_ATTR(DEVICE_PARTITION_, BY_COUNTS); + ADD_ATTR(DEVICE_PARTITION_, BY_COUNTS_LIST_END); + ADD_ATTR(DEVICE_PARTITION_, BY_AFFINITY_DOMAIN); +#endif + } + + { + py::class_ cls(m, "device_affinity_domain"); +#if PYOPENCL_CL_VERSION >= 0x1020 + ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, NUMA); + ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, L4_CACHE); + ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, L3_CACHE); + ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, L2_CACHE); + ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, L1_CACHE); + ADD_ATTR(DEVICE_AFFINITY_DOMAIN_, NEXT_PARTITIONABLE); +#endif + } + +#ifdef HAVE_GL + { + py::class_ cls(m, "gl_object_type"); + ADD_ATTR(GL_OBJECT_, BUFFER); + ADD_ATTR(GL_OBJECT_, TEXTURE2D); + ADD_ATTR(GL_OBJECT_, TEXTURE3D); + ADD_ATTR(GL_OBJECT_, RENDERBUFFER); + } + + { + py::class_ cls(m, "gl_texture_info"); + ADD_ATTR(GL_, TEXTURE_TARGET); + ADD_ATTR(GL_, MIPMAP_LEVEL); + } +#endif + + // }}} +} + + + + +// vim: foldmethod=marker diff --git a/src/wrap_helpers.hpp b/src/wrap_helpers.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4a2d1ee99e8fd044897e9791680ffc1a5c139222 --- /dev/null +++ b/src/wrap_helpers.hpp @@ -0,0 +1,163 @@ +#ifndef PYCUDA_WRAP_HELPERS_HEADER_SEEN +#define PYCUDA_WRAP_HELPERS_HEADER_SEEN + + +#include +#include + + +namespace py = pybind11; + + +#define PYTHON_ERROR(TYPE, REASON) \ +{ \ + PyErr_SetString(PyExc_##TYPE, REASON); \ + throw boost::python::error_already_set(); \ +} + +#define ENUM_VALUE(NAME) \ + value(#NAME, NAME) + +#define DEF_SIMPLE_METHOD(NAME) \ + def(#NAME, &cls::NAME) + +#define DEF_SIMPLE_STATIC_METHOD(NAME) \ + def_static(#NAME, &cls::NAME) + +#define DEF_SIMPLE_METHOD_WITH_ARGS(NAME, ARGS) \ + def(#NAME, &cls::NAME, boost::python::args ARGS) + +#define DEF_SIMPLE_FUNCTION(NAME) \ + m.def(#NAME, &NAME) + +#define DEF_SIMPLE_FUNCTION_WITH_ARGS(NAME, ARGS) \ + m.def(#NAME, &NAME, py::args ARGS) + +#define DEF_SIMPLE_RO_MEMBER(NAME) \ + def_readonly(#NAME, &cls::m_##NAME) + +#define DEF_SIMPLE_RW_MEMBER(NAME) \ + def_readwrite(#NAME, &cls::m_##NAME) + +#define COPY_PY_LIST(TYPE, NAME) \ + { \ + for (auto it: py_##NAME) \ + NAME.push_back(it.cast()); \ + } + +#define COPY_PY_COORD_TRIPLE(NAME) \ + size_t NAME[3] = {0, 0, 0}; \ + { \ + py::tuple py_tup_##NAME = py_##NAME; \ + size_t my_len = len(py_tup_##NAME); \ + if (my_len > 3) \ + throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \ + for (size_t i = 0; i < my_len; ++i) \ + NAME[i] = py_tup_##NAME[i].cast(); \ + } + +#define COPY_PY_PITCH_TUPLE(NAME) \ + size_t NAME[2] = {0, 0}; \ + if (py_##NAME.ptr() != Py_None) \ + { \ + py::tuple py_tup_##NAME = py_##NAME; \ + size_t my_len = len(py_tup_##NAME); \ + if (my_len > 2) \ + throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \ + for (size_t i = 0; i < my_len; ++i) \ + NAME[i] = py_tup_##NAME[i].cast(); \ + } + +#define COPY_PY_REGION_TRIPLE(NAME) \ + size_t NAME[3] = {1, 1, 1}; \ + { \ + py::tuple py_tup_##NAME = py_##NAME; \ + size_t my_len = len(py_tup_##NAME); \ + if (my_len > 3) \ + throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \ + for (size_t i = 0; i < my_len; ++i) \ + NAME[i] = py_tup_##NAME[i].cast(); \ + } + +#define PYOPENCL_PARSE_NUMPY_ARRAY_SPEC \ + PyArray_Descr *tp_descr; \ + if (PyArray_DescrConverter(dtype.ptr(), &tp_descr) != NPY_SUCCEED) \ + throw py::error_already_set(); \ + \ + std::vector shape; \ + try \ + { \ + shape.push_back(py_shape.cast()); \ + } \ + catch (py::cast_error &) \ + { \ + COPY_PY_LIST(npy_intp, shape); \ + } \ + \ + NPY_ORDER order = PyArray_CORDER; \ + PyArray_OrderConverter(py_order.ptr(), &order); \ + \ + int ary_flags = 0; \ + if (order == PyArray_FORTRANORDER) \ + ary_flags |= NPY_FARRAY; \ + else if (order == PyArray_CORDER) \ + ary_flags |= NPY_CARRAY; \ + else \ + throw std::runtime_error("unrecognized order specifier"); \ + \ + std::vector strides; \ + if (py_strides.ptr() != Py_None) \ + { \ + COPY_PY_LIST(npy_intp, strides); \ + } + +#define PYOPENCL_RETURN_VECTOR(ITEMTYPE, NAME) \ + { \ + py::list pyopencl_result; \ + for (ITEMTYPE item: NAME) \ + pyopencl_result.append(item); \ + return pyopencl_result; \ + } + +namespace +{ + template + inline py::object handle_from_new_ptr(T *ptr) + { + return py::cast(ptr, py::return_value_policy::take_ownership); + } + + template + inline T *from_int_ptr(intptr_t obj_ref, bool retain) + { + ClType clobj = (ClType) obj_ref; + return new T(clobj, retain); + } + + template + inline intptr_t to_int_ptr(T const &obj) + { + return (intptr_t) obj.data(); + } +} + +#define PYOPENCL_EXPOSE_TO_FROM_INT_PTR(CL_TYPENAME) \ + .def_static("from_int_ptr", from_int_ptr, \ + py::arg("int_ptr_value"), \ + py::arg("retain")=true, \ + "(static method) Return a new Python object referencing the C-level " \ + ":c:type:`" #CL_TYPENAME "` object at the location pointed to " \ + "by *int_ptr_value*. The relevant :c:func:`clRetain*` function " \ + "will be called if *retain* is True." \ + "If the previous owner of the object will *not* release the reference, " \ + "*retain* should be set to *False*, to effectively transfer ownership to " \ + ":mod:`pyopencl`." \ + "\n\n.. versionadded:: 2013.2\n" \ + "\n\n.. versionchanged:: 2016.1\n\n *retain* added.") \ + .def_property_readonly("int_ptr", to_int_ptr, \ + "Return an integer corresponding to the pointer value " \ + "of the underlying :c:type:`" #CL_TYPENAME "`. " \ + "Use :meth:`from_int_ptr` to turn back into a Python object." \ + "\n\n.. versionadded:: 2013.2\n") \ + +#endif diff --git a/src/wrap_mempool.cpp b/src/wrap_mempool.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a15efeb64c848c4e5e3928c98a5fb9186749e12b --- /dev/null +++ b/src/wrap_mempool.cpp @@ -0,0 +1,288 @@ +// Gregor Thalhammer (on Apr 13, 2011) said it's necessary to import Python.h +// first to prevent OS X from overriding a bunch of macros. (e.g. isspace) +#include + +#include +#include +#include "wrap_helpers.hpp" +#include "wrap_cl.hpp" +#include "mempool.hpp" +#include "tools.hpp" + + + +namespace +{ + class cl_allocator_base + { + protected: + std::shared_ptr m_context; + cl_mem_flags m_flags; + + public: + cl_allocator_base(std::shared_ptr const &ctx, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : m_context(ctx), m_flags(flags) + { + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) + throw pyopencl::error("Allocator", CL_INVALID_VALUE, + "cannot specify USE_HOST_PTR or COPY_HOST_PTR flags"); + } + + cl_allocator_base(cl_allocator_base const &src) + : m_context(src.m_context), m_flags(src.m_flags) + { } + + virtual ~cl_allocator_base() + { } + + typedef cl_mem pointer_type; + typedef size_t size_type; + + virtual cl_allocator_base *copy() const = 0; + virtual bool is_deferred() const = 0; + virtual pointer_type allocate(size_type s) = 0; + + void free(pointer_type p) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (p)); + } + + void try_release_blocks() + { + pyopencl::run_python_gc(); + } + }; + + class cl_deferred_allocator : public cl_allocator_base + { + private: + typedef cl_allocator_base super; + + public: + cl_deferred_allocator(std::shared_ptr const &ctx, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : super(ctx, flags) + { } + + cl_allocator_base *copy() const + { + return new cl_deferred_allocator(*this); + } + + bool is_deferred() const + { return true; } + + pointer_type allocate(size_type s) + { + return pyopencl::create_buffer(m_context->data(), m_flags, s, 0); + } + }; + + const unsigned zero = 0; + + class cl_immediate_allocator : public cl_allocator_base + { + private: + typedef cl_allocator_base super; + pyopencl::command_queue m_queue; + + public: + cl_immediate_allocator(pyopencl::command_queue &queue, + cl_mem_flags flags=CL_MEM_READ_WRITE) + : super(std::shared_ptr(queue.get_context()), flags), + m_queue(queue.data(), /*retain*/ true) + { } + + cl_immediate_allocator(cl_immediate_allocator const &src) + : super(src), m_queue(src.m_queue) + { } + + cl_allocator_base *copy() const + { + return new cl_immediate_allocator(*this); + } + + bool is_deferred() const + { return false; } + + pointer_type allocate(size_type s) + { + pointer_type ptr = pyopencl::create_buffer( + m_context->data(), m_flags, s, 0); + + // Make sure the buffer gets allocated right here and right now. + // This looks (and is) expensive. But immediate allocators + // have their main use in memory pools, whose basic assumption + // is that allocation is too expensive anyway--but they rely + // on exact 'out-of-memory' information. + unsigned zero = 0; + PYOPENCL_CALL_GUARDED(clEnqueueWriteBuffer, ( + m_queue.data(), + ptr, + /* is blocking */ CL_FALSE, + 0, std::min(s, sizeof(zero)), &zero, + 0, NULL, NULL + )); + + // No need to wait for completion here. clWaitForEvents (e.g.) + // cannot return mem object allocation failures. This implies that + // the buffer is faulted onto the device on enqueue. + + return ptr; + } + }; + + + + + inline + pyopencl::buffer *allocator_call(cl_allocator_base &alloc, size_t size) + { + cl_mem mem; + int try_count = 0; + while (try_count < 2) + { + try + { + mem = alloc.allocate(size); + break; + } + catch (pyopencl::error &e) + { + if (!e.is_out_of_memory()) + throw; + if (++try_count == 2) + throw; + } + + alloc.try_release_blocks(); + } + + try + { + return new pyopencl::buffer(mem, false); + } + catch (...) + { + PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); + throw; + } + } + + + + + class pooled_buffer + : public pyopencl::pooled_allocation >, + public pyopencl::memory_object_holder + { + private: + typedef + pyopencl::pooled_allocation > + super; + + public: + pooled_buffer( + std::shared_ptr p, super::size_type s) + : super(p, s) + { } + + const super::pointer_type data() const + { return ptr(); } + }; + + + + + pooled_buffer *device_pool_allocate( + std::shared_ptr > pool, + pyopencl::memory_pool::size_type sz) + { + return new pooled_buffer(pool, sz); + } + + + + + template + void expose_memory_pool(Wrapper &wrapper) + { + typedef typename Wrapper::type cls; + wrapper + .def_property_readonly("held_blocks", &cls::held_blocks) + .def_property_readonly("active_blocks", &cls::active_blocks) + .DEF_SIMPLE_STATIC_METHOD(bin_number) + .DEF_SIMPLE_STATIC_METHOD(alloc_size) + .DEF_SIMPLE_METHOD(free_held) + .DEF_SIMPLE_METHOD(stop_holding) + ; + } +} + + + + +void pyopencl_expose_mempool(py::module &m) +{ + m.def("bitlog2", pyopencl::bitlog2); + + { + typedef cl_allocator_base cls; + py::class_ wrapper( + m, "_tools_AllocatorBase"/*, py::no_init */); + wrapper + .def("__call__", allocator_call) + ; + + } + + { + typedef cl_deferred_allocator cls; + py::class_ wrapper( + m, "_tools_DeferredAllocator"); + wrapper + .def(py::init< + std::shared_ptr const &>()) + .def(py::init< + std::shared_ptr const &, + cl_mem_flags>()) + ; + } + + { + typedef cl_immediate_allocator cls; + py::class_ wrapper( + m, "_tools_ImmediateAllocator"); + wrapper + .def(py::init()) + .def(py::init()) + ; + } + + { + typedef pyopencl::memory_pool cls; + + py::class_< + cls, /* boost::noncopyable, */ + std::shared_ptr> wrapper( m, "MemoryPool"); + wrapper + .def(py::init()) + .def("allocate", device_pool_allocate) + .def("__call__", device_pool_allocate) + // undoc for now + .DEF_SIMPLE_METHOD(set_trace) + ; + + expose_memory_pool(wrapper); + } + + { + typedef pooled_buffer cls; + py::class_( + m, "PooledBuffer"/* , py::no_init */) + .def("release", &cls::free) + ; + } +} diff --git a/test/test_array.py b/test/test_array.py index 05008c169ae782a49b5b985c7a79780e337c5770..3e74bcf0e2bc3c5c56ebfbb971164d89fcc49a35 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -37,10 +37,11 @@ import pyopencl.tools as cl_tools from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests) from pyopencl.characterize import has_double_support, has_struct_arg_count_bug -from pyopencl.cffi_cl import _PYPY from pyopencl.clrandom import RanluxGenerator, PhiloxGenerator, ThreefryGenerator +_PYPY = cl._PYPY + # {{{ helpers @@ -580,7 +581,7 @@ def test_bitwise(ctx_factory): @pytest.mark.parametrize("rng_class", [RanluxGenerator, PhiloxGenerator, ThreefryGenerator]) -@pytest.mark.parametrize("ary_size", [300, 301, 302, 303, 10007]) +@pytest.mark.parametrize("ary_size", [300, 301, 302, 303, 10007, 1000000]) def test_random_float_in_range(ctx_factory, rng_class, ary_size, plot_hist=False): context = ctx_factory() queue = cl.CommandQueue(context) @@ -605,16 +606,22 @@ def test_random_float_in_range(ctx_factory, rng_class, ary_size, plot_hist=False pt.hist(ran.get(), 30) pt.show() - assert (0 < ran.get()).all() - assert (ran.get() < 1).all() + assert (0 <= ran.get()).all() + assert (ran.get() <= 1).all() if rng_class is RanluxGenerator: gen.synchronize(queue) ran = cl_array.zeros(queue, ary_size, dtype) gen.fill_uniform(ran, a=4, b=7) - assert (4 < ran.get()).all() - assert (ran.get() < 7).all() + ran_host = ran.get() + + for cond in [4 <= ran_host, ran_host <= 7]: + good = cond.all() + if not good: + print(np.where(~cond)) + print(ran_host[~cond]) + assert good ran = gen.normal(queue, ary_size, dtype, mu=10, sigma=3) diff --git a/test/test_wrapper.py b/test/test_wrapper.py index a17866fa77110f4d1b232898ca46c76f54ec4a83..4d729642163b0bebc63f2bd356a6e7ff00868ab7 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -295,7 +295,9 @@ def test_image_format_constructor(): assert iform.channel_order == cl.channel_order.RGBA assert iform.channel_data_type == cl.channel_type.FLOAT - assert not iform.__dict__ + + if not cl._PYPY: + assert not hasattr(iform, "__dict__") def test_device_topology_amd_constructor(): @@ -306,7 +308,8 @@ def test_device_topology_amd_constructor(): assert topol.device == 4 assert topol.function == 5 - assert not topol.__dict__ + if not cl._PYPY: + assert not hasattr(topol, "__dict__") def test_nonempty_supported_image_formats(ctx_factory): @@ -351,7 +354,7 @@ def test_that_python_args_fail(ctx_factory): prg.mult(queue, a.shape, None, a_buf, np.float32(2), np.int32(3)) a_result = np.empty_like(a) - cl.enqueue_read_buffer(queue, a_buf, a_result).wait() + cl.enqueue_copy(queue, a_buf, a_result).wait() def test_image_2d(ctx_factory): @@ -513,8 +516,8 @@ def test_copy_buffer(ctx_factory): buf1 = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a) buf2 = cl.Buffer(context, mf.WRITE_ONLY, b.nbytes) - cl.enqueue_copy_buffer(queue, buf1, buf2).wait() - cl.enqueue_read_buffer(queue, buf2, b).wait() + cl.enqueue_copy(queue, buf2, buf1).wait() + cl.enqueue_copy(queue, b, buf2).wait() assert la.norm(a - b) == 0 @@ -569,7 +572,7 @@ def test_vector_args(ctx_factory): prg.set_vec(queue, dest.shape, None, x, dest_buf) - cl.enqueue_read_buffer(queue, dest_buf, dest).wait() + cl.enqueue_copy(queue, dest, dest_buf).wait() assert (dest == x).all() @@ -665,36 +668,6 @@ def test_unload_compiler(platform): cl.unload_platform_compiler(platform) -def test_enqueue_task(ctx_factory): - ctx = ctx_factory() - queue = cl.CommandQueue(ctx) - mf = cl.mem_flags - - prg = cl.Program(ctx, """ - __kernel void - reverse(__global const float *in, __global float *out, int n) - { - for (int i = 0;i < n;i++) { - out[i] = in[n - 1 - i]; - } - } - """).build() - knl = prg.reverse - - n = 100 - a = np.random.rand(n).astype(np.float32) - b = np.empty_like(a) - - buf1 = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a) - buf2 = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes) - - knl.set_args(buf1, buf2, np.int32(n)) - cl.enqueue_task(queue, knl) - - cl.enqueue_copy(queue, b, buf2).wait() - assert la.norm(a[::-1] - b) == 0 - - def test_platform_get_devices(ctx_factory): ctx = ctx_factory() platform = ctx.devices[0].platform @@ -768,6 +741,10 @@ def test_user_event(ctx_factory): def test_buffer_get_host_array(ctx_factory): + if cl._PYPY: + # FIXME + pytest.xfail("Buffer.get_host_array not yet working on pypy") + ctx = ctx_factory() mf = cl.mem_flags @@ -823,7 +800,7 @@ def test_event_set_callback(ctx_factory): queue = cl.CommandQueue(ctx) if ctx._get_cl_version() < (1, 1): - pytest.skip("OpenCL 1.1 or newer required fro set_callback") + pytest.skip("OpenCL 1.1 or newer required for set_callback") a_np = np.random.rand(50000).astype(np.float32) b_np = np.random.rand(50000).astype(np.float32) @@ -857,9 +834,17 @@ def test_event_set_callback(ctx_factory): queue.finish() + counter = 0 + # yuck - from time import sleep - sleep(0.1) + while not got_called: + from time import sleep + sleep(0.01) + + # wait up to five seconds (?!) + counter += 1 + if counter >= 500: + break assert got_called @@ -952,18 +937,10 @@ def test_coarse_grain_svm(ctx_factory): dev = ctx.devices[0] - has_svm = (ctx._get_cl_version() >= (2, 0) and - ctx.devices[0]._get_cl_version() >= (2, 0) and - cl.get_cl_header_version() >= (2, 0)) - - if dev.platform.name == "Portable Computing Language": - has_svm = ( - get_pocl_version(dev.platform) >= (1, 0) - and cl.get_cl_header_version() >= (2, 0)) - - if not has_svm: - from pytest import skip - skip("SVM only available in OpenCL 2.0 and higher") + from pyopencl.characterize import has_coarse_grain_buffer_svm + from pytest import skip + if not has_coarse_grain_buffer_svm(queue.device): + skip("device does not support coarse-grain SVM") if ("AMD" in dev.platform.name and dev.type & cl.device_type.CPU): @@ -1012,13 +989,9 @@ def test_fine_grain_svm(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) + from pyopencl.characterize import has_fine_grain_buffer_svm from pytest import skip - if (ctx._get_cl_version() < (2, 0) or - cl.get_cl_header_version() < (2, 0)): - skip("SVM only available in OpenCL 2.0 and higher") - - if not (ctx.devices[0].svm_capabilities - & cl.device_svm_capabilities.FINE_GRAIN_BUFFER): + if not has_fine_grain_buffer_svm(queue.device): skip("device does not support fine-grain SVM") n = 3000 @@ -1050,6 +1023,10 @@ def test_fine_grain_svm(ctx_factory): cl.cltypes.uint2, ]) def test_map_dtype(ctx_factory, dtype): + if cl._PYPY: + # FIXME + pytest.xfail("enqueue_map_buffer not yet working on pypy") + ctx = ctx_factory() queue = cl.CommandQueue(ctx)