Skip to content
Snippets Groups Projects
wrap_cl.hpp 122 KiB
Newer Older
#ifndef _AFJHAYYTA_PYOPENCL_HEADER_SEEN_WRAP_CL_HPP
#define _AFJHAYYTA_PYOPENCL_HEADER_SEEN_WRAP_CL_HPP

// CL 1.2 undecided:
// clSetPrintfCallback

// {{{ includes

#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
// #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION

#ifdef __APPLE__

// Mac ------------------------------------------------------------------------
#include <OpenCL/opencl.h>
#ifdef HAVE_GL

#define PYOPENCL_GL_SHARING_VERSION 1

#include <OpenGL/OpenGL.h>
#include <OpenCL/cl_gl.h>
#include <OpenCL/cl_gl_ext.h>
#endif

#else

// elsewhere ------------------------------------------------------------------
#define CL_TARGET_OPENCL_VERSION 220

#include <CL/cl.h>
#include "pyopencl_ext.h"

#if defined(_WIN32)
#define NOMINMAX
#include <windows.h>
#endif

#ifdef HAVE_GL
#include <GL/gl.h>
#include <CL/cl_gl.h>
#endif

#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1)
#define PYOPENCL_GL_SHARING_VERSION cl_khr_gl_sharing
#endif

#endif

#include <stdexcept>
#include <iostream>
#include <vector>
#include <utility>
#include <numeric>
#include "wrap_helpers.hpp"
#include "numpy_init.hpp"
#include "tools.hpp"

#ifdef PYOPENCL_PRETEND_CL_VERSION
#define PYOPENCL_CL_VERSION PYOPENCL_PRETEND_CL_VERSION
#else

#if defined(CL_VERSION_2_2)
#define PYOPENCL_CL_VERSION 0x2020
#elif defined(CL_VERSION_2_1)
#define PYOPENCL_CL_VERSION 0x2010
#elif defined(CL_VERSION_2_0)
#define PYOPENCL_CL_VERSION 0x2000
#elif defined(CL_VERSION_1_2)
#define PYOPENCL_CL_VERSION 0x1020
#elif defined(CL_VERSION_1_1)
#define PYOPENCL_CL_VERSION 0x1010
#else
#define PYOPENCL_CL_VERSION 0x1000
#endif

#endif


#if PY_VERSION_HEX >= 0x03000000
#define PYOPENCL_USE_NEW_BUFFER_INTERFACE
#define PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(s) std::move(s)
#else
#define PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(s) (s)
// }}}





// {{{ tools
#if PY_VERSION_HEX >= 0x02050000
  typedef Py_ssize_t PYOPENCL_BUFFER_SIZE_T;
#else
  typedef int PYOPENCL_BUFFER_SIZE_T;
#endif

#define PYOPENCL_CAST_BOOL(B) ((B) ? CL_TRUE : CL_FALSE)





#define PYOPENCL_DEPRECATED(WHAT, KILL_VERSION, EXTRA_MSG) \
  { \
    PyErr_Warn( \
        PyExc_DeprecationWarning, \
        WHAT " is deprecated and will stop working in PyOpenCL " KILL_VERSION". " \
        EXTRA_MSG); \
  }

#if PYOPENCL_CL_VERSION >= 0x1020

#define PYOPENCL_GET_EXT_FUN(PLATFORM, NAME, VAR) \
    NAME##_fn VAR \
      = (NAME##_fn) \
      clGetExtensionFunctionAddressForPlatform(PLATFORM, #NAME); \
    \
    if (!VAR) \
      throw error(#NAME, CL_INVALID_VALUE, #NAME \
          "not available");

#else

#define PYOPENCL_GET_EXT_FUN(PLATFORM, NAME, VAR) \
    NAME##_fn VAR \
      = (NAME##_fn) \
      clGetExtensionFunctionAddress(#NAME); \
    \
    if (!VAR) \
      throw error(#NAME, CL_INVALID_VALUE, #NAME \
          "not available");

#endif


#define PYOPENCL_PARSE_PY_DEVICES \
    std::vector<cl_device_id> devices_vec; \
    cl_uint num_devices; \
    cl_device_id *devices; \
    \
    if (py_devices.ptr() == Py_None) \
    { \
      num_devices = 0; \
      devices = 0; \
    } \
    else \
    { \
      for (py::handle py_dev: py_devices) \
        devices_vec.push_back( \
            (py_dev).cast<device &>().data()); \
      num_devices = devices_vec.size(); \
Andreas Klöckner's avatar
Andreas Klöckner committed
      devices = devices_vec.empty( ) ? nullptr : &devices_vec.front(); \
    } \


#define PYOPENCL_RETRY_RETURN_IF_MEM_ERROR(OPERATION) \
    try \
    { \
      OPERATION \
    } \
    catch (pyopencl::error &e) \
    { \
      if (!e.is_out_of_memory()) \
        throw; \
    } \
    \
    /* If we get here, we got an error from CL.
     * We should run the Python GC to try and free up
     * some memory references. */ \
    run_python_gc(); \
    \
    /* Now retry the allocation. If it fails again,
     * let it fail. */ \
    { \
      OPERATION \
    }




#define PYOPENCL_RETRY_IF_MEM_ERROR(OPERATION) \
  { \
    bool failed_with_mem_error = false; \
    try \
    { \
      OPERATION \
    } \
    catch (pyopencl::error &e) \
    { \
      failed_with_mem_error = true; \
      if (!e.is_out_of_memory()) \
        throw; \
    } \
    \
    if (failed_with_mem_error) \
    { \
      /* If we get here, we got an error from CL.
       * We should run the Python GC to try and free up
       * some memory references. */ \
      run_python_gc(); \
      \
      /* Now retry the allocation. If it fails again,
       * let it fail. */ \
      { \
        OPERATION \
      } \
    } \
  }

// }}}

// {{{ tracing and error reporting
#ifdef PYOPENCL_TRACE
  #define PYOPENCL_PRINT_CALL_TRACE(NAME) \
    std::cerr << NAME << std::endl;
  #define PYOPENCL_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) \
    std::cerr << NAME << " (" << EXTRA_INFO << ')' << std::endl;
#else
  #define PYOPENCL_PRINT_CALL_TRACE(NAME) /*nothing*/
  #define PYOPENCL_PRINT_CALL_TRACE_INFO(NAME, EXTRA_INFO) /*nothing*/
#endif

#define PYOPENCL_CALL_GUARDED_THREADED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \
  { \
    PYOPENCL_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \
    cl_int status_code; \
    { \
      py::gil_scoped_release release; \
      status_code = NAME ARGLIST; \
    if (status_code != CL_SUCCESS) \
      throw pyopencl::error(#NAME, status_code);\
  }

#define PYOPENCL_CALL_GUARDED_WITH_TRACE_INFO(NAME, ARGLIST, TRACE_INFO) \
  { \
    PYOPENCL_PRINT_CALL_TRACE_INFO(#NAME, TRACE_INFO); \
    cl_int status_code; \
    status_code = NAME ARGLIST; \
    if (status_code != CL_SUCCESS) \
      throw pyopencl::error(#NAME, status_code);\
  }

#define PYOPENCL_CALL_GUARDED_THREADED(NAME, ARGLIST) \
  { \
    PYOPENCL_PRINT_CALL_TRACE(#NAME); \
    cl_int status_code; \
    { \
      py::gil_scoped_release release; \
      status_code = NAME ARGLIST; \
    if (status_code != CL_SUCCESS) \
      throw pyopencl::error(#NAME, status_code);\
  }

#define PYOPENCL_CALL_GUARDED(NAME, ARGLIST) \
  { \
    PYOPENCL_PRINT_CALL_TRACE(#NAME); \
    cl_int status_code; \
    status_code = NAME ARGLIST; \
    if (status_code != CL_SUCCESS) \
      throw pyopencl::error(#NAME, status_code);\
  }
#define PYOPENCL_CALL_GUARDED_CLEANUP(NAME, ARGLIST) \
  { \
    PYOPENCL_PRINT_CALL_TRACE(#NAME); \
    cl_int status_code; \
    status_code = NAME ARGLIST; \
    if (status_code != CL_SUCCESS) \
      std::cerr \
        << "PyOpenCL WARNING: a clean-up operation failed (dead context maybe?)" \
        << std::endl \
        << #NAME " failed with code " << status_code \
        << std::endl; \
  }

// }}}

// {{{ get_info helpers
#define PYOPENCL_GET_OPAQUE_INFO(WHAT, FIRST_ARG, SECOND_ARG, CL_TYPE, TYPE) \
  { \
    CL_TYPE param_value; \
    PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \
          (FIRST_ARG, SECOND_ARG, sizeof(param_value), &param_value, 0)); \
    if (param_value) \
      return py::object(handle_from_new_ptr( \
            new TYPE(param_value, /*retain*/ true))); \
    else \
      return py::none(); \
  }

#define PYOPENCL_GET_VEC_INFO(WHAT, FIRST_ARG, SECOND_ARG, RES_VEC) \
  { \
    size_t size; \
    PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \
        (FIRST_ARG, SECOND_ARG, 0, 0, &size)); \
    \
    RES_VEC.resize(size / sizeof(RES_VEC.front())); \
    \
    PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \
        (FIRST_ARG, SECOND_ARG, size, \
Andreas Klöckner's avatar
Andreas Klöckner committed
         RES_VEC.empty( ) ? nullptr : &RES_VEC.front(), &size)); \
  }

#define PYOPENCL_GET_STR_INFO(WHAT, FIRST_ARG, SECOND_ARG) \
  { \
    size_t param_value_size; \
    PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \
        (FIRST_ARG, SECOND_ARG, 0, 0, &param_value_size)); \
    \
    std::vector<char> param_value(param_value_size); \
    PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \
        (FIRST_ARG, SECOND_ARG, param_value_size,  \
Andreas Klöckner's avatar
Andreas Klöckner committed
         param_value.empty( ) ? nullptr : &param_value.front(), &param_value_size)); \
    return py::cast( \
        param_value.empty( ) ? "" : std::string(&param_value.front(), param_value_size-1)); \
  }




#define PYOPENCL_GET_INTEGRAL_INFO(WHAT, FIRST_ARG, SECOND_ARG, TYPE) \
  { \
    TYPE param_value; \
    PYOPENCL_CALL_GUARDED(clGet##WHAT##Info, \
        (FIRST_ARG, SECOND_ARG, sizeof(param_value), &param_value, 0)); \
    return py::cast(param_value); \
  }

// }}}

// {{{ event helpers --------------------------------------------------------------
#define PYOPENCL_PARSE_WAIT_FOR \
    cl_uint num_events_in_wait_list = 0; \
    std::vector<cl_event> event_wait_list; \
    \
    if (py_wait_for.ptr() != Py_None) \
    { \
      event_wait_list.resize(len(py_wait_for)); \
Andreas Klöckner's avatar
Andreas Klöckner committed
      for (py::handle evt: py_wait_for) \
        event_wait_list[num_events_in_wait_list++] = \
          evt.cast<const event &>().data(); \
    }

#define PYOPENCL_WAITLIST_ARGS \
Andreas Klöckner's avatar
Andreas Klöckner committed
    num_events_in_wait_list, event_wait_list.empty( ) ? nullptr : &event_wait_list.front()

#define PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, obj) \
    try \
    { \
      return new nanny_event(evt, false, obj); \
    } \
    catch (...) \
    { \
      clReleaseEvent(evt); \
      throw; \
    }

#define PYOPENCL_RETURN_NEW_EVENT(evt) \
    try \
    { \
      return new event(evt, false); \
    } \
    catch (...) \
    { \
      clReleaseEvent(evt); \
      throw; \
    }

// }}}

// {{{ equality testing
#define PYOPENCL_EQUALITY_TESTS(cls) \
    bool operator==(cls const &other) const \
    { return data() == other.data(); } \
    bool operator!=(cls const &other) const \
    { return data() != other.data(); } \
    long hash() const \
    { return (long) (intptr_t) data(); }
// }}}



namespace pyopencl
{
  // {{{ error
  class error : public std::runtime_error
  {
    private:
      const char *m_routine;
      cl_int m_code;

    public:
      error(const char *rout, cl_int c, const char *msg="")
        : std::runtime_error(msg), m_routine(rout), m_code(c)
      { }

      const char *routine() const
      {
        return m_routine;
      }

      cl_int code() const
      {
        return m_code;
      }

      bool is_out_of_memory() const
      {
        return (code() == CL_MEM_OBJECT_ALLOCATION_FAILURE
            || code() == CL_OUT_OF_RESOURCES
            || code() == CL_OUT_OF_HOST_MEMORY);
      }

  };

  // }}}


  // {{{ buffer interface helper
  //
#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
  class py_buffer_wrapper : public noncopyable
  {
    private:
      bool m_initialized;

    public:
      Py_buffer m_buf;

    py_buffer_wrapper()
      : m_initialized(false)
    {}

    void get(PyObject *obj, int flags)
    {
      if (PyObject_GetBuffer(obj, &m_buf, flags))
        throw py::error_already_set();

      m_initialized = true;
    }

    virtual ~py_buffer_wrapper()
    {
      if (m_initialized)
        PyBuffer_Release(&m_buf);
    }
  };
#endif

  // }}}

  inline
  py::tuple get_cl_header_version()
  {
    return py::make_tuple(
        PYOPENCL_CL_VERSION >> (3*4),
        (PYOPENCL_CL_VERSION >> (1*4)) & 0xff
        );
  }


  // {{{ platform

  class platform : noncopyable
  {
    private:
      cl_platform_id m_platform;

    public:
      platform(cl_platform_id pid)
      : m_platform(pid)
      { }

      platform(cl_platform_id pid, bool /*retain (ignored)*/)
      : m_platform(pid)
      { }

      cl_platform_id data() const
      {
        return m_platform;
      }

      PYOPENCL_EQUALITY_TESTS(platform);

      py::object get_info(cl_platform_info param_name) const
      {
        switch (param_name)
        {
          case CL_PLATFORM_PROFILE:
          case CL_PLATFORM_VERSION:
          case CL_PLATFORM_NAME:
          case CL_PLATFORM_VENDOR:
#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001)
          case CL_PLATFORM_EXTENSIONS:
#endif
            PYOPENCL_GET_STR_INFO(Platform, m_platform, param_name);

          default:
            throw error("Platform.get_info", CL_INVALID_VALUE);
        }
      }

      py::list get_devices(cl_device_type devtype);
  };




  inline
  py::list get_platforms()
  {
    cl_uint num_platforms = 0;
    PYOPENCL_CALL_GUARDED(clGetPlatformIDs, (0, 0, &num_platforms));

    std::vector<cl_platform_id> platforms(num_platforms);
    PYOPENCL_CALL_GUARDED(clGetPlatformIDs,
Andreas Klöckner's avatar
Andreas Klöckner committed
        (num_platforms, platforms.empty( ) ? nullptr : &platforms.front(), &num_platforms));

    py::list result;
    for (cl_platform_id pid: platforms)
      result.append(handle_from_new_ptr(
            new platform(pid)));

    return result;
  }

  // }}}

  // {{{ device

  class device : noncopyable
  {
    public:
      enum reference_type_t {
        REF_NOT_OWNABLE,
#if PYOPENCL_CL_VERSION >= 0x1020
        REF_CL_1_2,
#endif
      };
    private:
      cl_device_id m_device;
      reference_type_t m_ref_type;

    public:
      device(cl_device_id did)
      : m_device(did), m_ref_type(REF_NOT_OWNABLE)
      { }

      device(cl_device_id did, bool retain, reference_type_t ref_type=REF_NOT_OWNABLE)
      : m_device(did), m_ref_type(ref_type)
      {
        if (retain && ref_type != REF_NOT_OWNABLE)
        {
          if (false)
          { }

#if PYOPENCL_CL_VERSION >= 0x1020
          else if (ref_type == REF_CL_1_2)
          {
            PYOPENCL_CALL_GUARDED(clRetainDevice, (did));
          }
#endif

          else
            throw error("Device", CL_INVALID_VALUE,
                "cannot own references to devices when device fission or CL 1.2 is not available");
        }
      }

      ~device()
      {
#if PYOPENCL_CL_VERSION >= 0x1020
          PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseDevice, (m_device));
#endif
      }

      cl_device_id data() const
      {
        return m_device;
      }

      PYOPENCL_EQUALITY_TESTS(device);

      py::object get_info(cl_device_info param_name) const
      {
#define DEV_GET_INT_INF(TYPE) \
        PYOPENCL_GET_INTEGRAL_INFO(Device, m_device, param_name, TYPE);

        switch (param_name)
        {
          case CL_DEVICE_TYPE: DEV_GET_INT_INF(cl_device_type);
          case CL_DEVICE_VENDOR_ID: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_MAX_COMPUTE_UNITS: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_MAX_WORK_GROUP_SIZE: DEV_GET_INT_INF(size_t);

          case CL_DEVICE_MAX_WORK_ITEM_SIZES:
            {
              std::vector<size_t> result;
              PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result);
              PYOPENCL_RETURN_VECTOR(size_t, result);
            }

          case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: DEV_GET_INT_INF(cl_uint);

          case CL_DEVICE_MAX_CLOCK_FREQUENCY: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_ADDRESS_BITS: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_MAX_READ_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_MAX_WRITE_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_MAX_MEM_ALLOC_SIZE: DEV_GET_INT_INF(cl_ulong);
          case CL_DEVICE_IMAGE2D_MAX_WIDTH: DEV_GET_INT_INF(size_t);
          case CL_DEVICE_IMAGE2D_MAX_HEIGHT: DEV_GET_INT_INF(size_t);
          case CL_DEVICE_IMAGE3D_MAX_WIDTH: DEV_GET_INT_INF(size_t);
          case CL_DEVICE_IMAGE3D_MAX_HEIGHT: DEV_GET_INT_INF(size_t);
          case CL_DEVICE_IMAGE3D_MAX_DEPTH: DEV_GET_INT_INF(size_t);
          case CL_DEVICE_IMAGE_SUPPORT: DEV_GET_INT_INF(cl_bool);
          case CL_DEVICE_MAX_PARAMETER_SIZE: DEV_GET_INT_INF(size_t);
          case CL_DEVICE_MAX_SAMPLERS: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_MEM_BASE_ADDR_ALIGN: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_SINGLE_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config);
#ifdef CL_DEVICE_DOUBLE_FP_CONFIG
          case CL_DEVICE_DOUBLE_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config);
#endif
#ifdef CL_DEVICE_HALF_FP_CONFIG
          case CL_DEVICE_HALF_FP_CONFIG: DEV_GET_INT_INF(cl_device_fp_config);
#endif

          case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: DEV_GET_INT_INF(cl_device_mem_cache_type);
          case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: DEV_GET_INT_INF(cl_ulong);
          case CL_DEVICE_GLOBAL_MEM_SIZE: DEV_GET_INT_INF(cl_ulong);

          case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: DEV_GET_INT_INF(cl_ulong);
          case CL_DEVICE_MAX_CONSTANT_ARGS: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_LOCAL_MEM_TYPE: DEV_GET_INT_INF(cl_device_local_mem_type);
          case CL_DEVICE_LOCAL_MEM_SIZE: DEV_GET_INT_INF(cl_ulong);
          case CL_DEVICE_ERROR_CORRECTION_SUPPORT: DEV_GET_INT_INF(cl_bool);
          case CL_DEVICE_PROFILING_TIMER_RESOLUTION: DEV_GET_INT_INF(size_t);
          case CL_DEVICE_ENDIAN_LITTLE: DEV_GET_INT_INF(cl_bool);
          case CL_DEVICE_AVAILABLE: DEV_GET_INT_INF(cl_bool);
          case CL_DEVICE_COMPILER_AVAILABLE: DEV_GET_INT_INF(cl_bool);
          case CL_DEVICE_EXECUTION_CAPABILITIES: DEV_GET_INT_INF(cl_device_exec_capabilities);
Andreas Klöckner's avatar
Andreas Klöckner committed
#if PYOPENCL_CL_VERSION >= 0x2000
          case CL_DEVICE_QUEUE_ON_HOST_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties);
#else
          case CL_DEVICE_QUEUE_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties);
Andreas Klöckner's avatar
Andreas Klöckner committed
#endif

          case CL_DEVICE_NAME:
          case CL_DEVICE_VENDOR:
          case CL_DRIVER_VERSION:
          case CL_DEVICE_PROFILE:
          case CL_DEVICE_VERSION:
          case CL_DEVICE_EXTENSIONS:
            PYOPENCL_GET_STR_INFO(Device, m_device, param_name);

          case CL_DEVICE_PLATFORM:
            PYOPENCL_GET_OPAQUE_INFO(Device, m_device, param_name, cl_platform_id, platform);

#if PYOPENCL_CL_VERSION >= 0x1010
          case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: DEV_GET_INT_INF(cl_uint);

          case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: DEV_GET_INT_INF(cl_uint);

          case CL_DEVICE_HOST_UNIFIED_MEMORY: DEV_GET_INT_INF(cl_bool);
          case CL_DEVICE_OPENCL_C_VERSION:
            PYOPENCL_GET_STR_INFO(Device, m_device, param_name);
#endif
#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
          case CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV:
          case CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV:
          case CL_DEVICE_REGISTERS_PER_BLOCK_NV:
          case CL_DEVICE_WARP_SIZE_NV:
            DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_GPU_OVERLAP_NV:
          case CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV:
          case CL_DEVICE_INTEGRATED_MEMORY_NV:
            DEV_GET_INT_INF(cl_bool);
#endif
Andreas Klöckner's avatar
Andreas Klöckner committed
#ifdef CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV
          case CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV:
            DEV_GET_INT_INF(cl_uint);
#endif
#ifdef CL_DEVICE_PCI_BUS_ID_NV
          case CL_DEVICE_PCI_BUS_ID_NV:
            DEV_GET_INT_INF(cl_uint);
#endif
#ifdef CL_DEVICE_PCI_SLOT_ID_NV
          case CL_DEVICE_PCI_SLOT_ID_NV:
            DEV_GET_INT_INF(cl_uint);
#endif
#ifdef CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD
          case CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD: DEV_GET_INT_INF(cl_bool);
#endif
#ifdef CL_DEVICE_GFXIP_MAJOR_AMD
          case CL_DEVICE_GFXIP_MAJOR_AMD: DEV_GET_INT_INF(cl_uint);
#endif
#ifdef CL_DEVICE_GFXIP_MINOR_AMD
          case CL_DEVICE_GFXIP_MINOR_AMD: DEV_GET_INT_INF(cl_uint);
#endif
#ifdef CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD
          case CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD: DEV_GET_INT_INF(cl_uint);
#endif
#if PYOPENCL_CL_VERSION >= 0x1020
          case CL_DEVICE_LINKER_AVAILABLE: DEV_GET_INT_INF(cl_bool);
          case CL_DEVICE_BUILT_IN_KERNELS:
            PYOPENCL_GET_STR_INFO(Device, m_device, param_name);
          case CL_DEVICE_IMAGE_MAX_BUFFER_SIZE: DEV_GET_INT_INF(size_t);
          case CL_DEVICE_IMAGE_MAX_ARRAY_SIZE: DEV_GET_INT_INF(size_t);
          case CL_DEVICE_PARENT_DEVICE:
            PYOPENCL_GET_OPAQUE_INFO(Device, m_device, param_name, cl_device_id, device);
          case CL_DEVICE_PARTITION_MAX_SUB_DEVICES: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_PARTITION_TYPE:
          case CL_DEVICE_PARTITION_PROPERTIES:
            {
              std::vector<cl_device_partition_property> result;
              PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result);
              PYOPENCL_RETURN_VECTOR(cl_device_partition_property, result);
            }
          case CL_DEVICE_PARTITION_AFFINITY_DOMAIN:
            {
              std::vector<cl_device_affinity_domain> result;
              PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result);
              PYOPENCL_RETURN_VECTOR(cl_device_affinity_domain, result);
            }
          case CL_DEVICE_REFERENCE_COUNT: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_PREFERRED_INTEROP_USER_SYNC: DEV_GET_INT_INF(cl_bool);
          case CL_DEVICE_PRINTF_BUFFER_SIZE: DEV_GET_INT_INF(cl_bool);
#endif
Andreas Klöckner's avatar
Andreas Klöckner committed
// {{{ AMD dev attrs cl_amd_device_attribute_query
//
// types of AMD dev attrs divined from
// https://www.khronos.org/registry/cl/api/1.2/cl.hpp
#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD
          case CL_DEVICE_PROFILING_TIMER_OFFSET_AMD: DEV_GET_INT_INF(cl_ulong);
#endif
/* FIXME
#ifdef CL_DEVICE_TOPOLOGY_AMD
          case CL_DEVICE_TOPOLOGY_AMD:
#endif
*/
#ifdef CL_DEVICE_BOARD_NAME_AMD
          case CL_DEVICE_BOARD_NAME_AMD: ;
            PYOPENCL_GET_STR_INFO(Device, m_device, param_name);
#endif
#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD
          case CL_DEVICE_GLOBAL_FREE_MEMORY_AMD:
            {
              std::vector<size_t> result;
              PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result);
              PYOPENCL_RETURN_VECTOR(size_t, result);
            }
#endif
#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD
          case CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD: DEV_GET_INT_INF(cl_uint);
#endif
#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD
          case CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD: DEV_GET_INT_INF(cl_uint);
#endif
#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD
          case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD: DEV_GET_INT_INF(cl_uint);
#endif
#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD
          case CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD: DEV_GET_INT_INF(cl_uint);
#endif
#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD
          case CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD: DEV_GET_INT_INF(cl_uint);
#endif
#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD
          case CL_DEVICE_LOCAL_MEM_BANKS_AMD: DEV_GET_INT_INF(cl_uint);
#endif
// }}}

#ifdef CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT
          case CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT: DEV_GET_INT_INF(cl_uint);
#endif
Andreas Klöckner's avatar
Andreas Klöckner committed
#if PYOPENCL_CL_VERSION >= 0x2000
          case CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE: DEV_GET_INT_INF(size_t);
          case CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES: DEV_GET_INT_INF(cl_command_queue_properties);
          case CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_MAX_ON_DEVICE_QUEUES: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_MAX_ON_DEVICE_EVENTS: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_SVM_CAPABILITIES: DEV_GET_INT_INF(cl_device_svm_capabilities);
          case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: DEV_GET_INT_INF(size_t);
          case CL_DEVICE_MAX_PIPE_ARGS: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_PIPE_MAX_PACKET_SIZE: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT: DEV_GET_INT_INF(cl_uint);
#endif
#if PYOPENCL_CL_VERSION >= 0x2010
          case CL_DEVICE_IL_VERSION:
            PYOPENCL_GET_STR_INFO(Device, m_device, param_name);
          case CL_DEVICE_MAX_NUM_SUB_GROUPS: DEV_GET_INT_INF(cl_uint);
          case CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS: DEV_GET_INT_INF(cl_bool);
Andreas Klöckner's avatar
Andreas Klöckner committed
#endif
#ifdef CL_DEVICE_ME_VERSION_INTEL
          case CL_DEVICE_ME_VERSION_INTEL: DEV_GET_INT_INF(cl_uint);
#endif
#ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM
          case CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM: DEV_GET_INT_INF(cl_uint);
#endif
#ifdef CL_DEVICE_PAGE_SIZE_QCOM
          case CL_DEVICE_PAGE_SIZE_QCOM: DEV_GET_INT_INF(cl_uint);
#endif
#ifdef CL_DEVICE_SPIR_VERSIONS
          case CL_DEVICE_SPIR_VERSIONS:
            PYOPENCL_GET_STR_INFO(Device, m_device, param_name);
#endif
#ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA
          case CL_DEVICE_CORE_TEMPERATURE_ALTERA: DEV_GET_INT_INF(cl_int);
#endif

#ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL
          case CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL:
            {
              std::vector<cl_uint> result;
              PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result);
              PYOPENCL_RETURN_VECTOR(cl_uint, result);
            }
#endif
#ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL
          case CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL: DEV_GET_INT_INF(cl_uint);
#endif

          default:
            throw error("Device.get_info", CL_INVALID_VALUE);
        }
      }

#if PYOPENCL_CL_VERSION >= 0x1020
      py::list create_sub_devices(py::object py_properties)
      {
        std::vector<cl_device_partition_property> properties;

        COPY_PY_LIST(cl_device_partition_property, properties);
        properties.push_back(0);

        cl_device_partition_property *props_ptr
Andreas Klöckner's avatar
Andreas Klöckner committed
          = properties.empty( ) ? nullptr : &properties.front();

        cl_uint num_entries;
        PYOPENCL_CALL_GUARDED(clCreateSubDevices,
Andreas Klöckner's avatar
Andreas Klöckner committed
            (m_device, props_ptr, 0, nullptr, &num_entries));

        std::vector<cl_device_id> result;
        result.resize(num_entries);

        PYOPENCL_CALL_GUARDED(clCreateSubDevices,
Andreas Klöckner's avatar
Andreas Klöckner committed
            (m_device, props_ptr, num_entries, &result.front(), nullptr));

        py::list py_result;
        for (cl_device_id did: result)
          py_result.append(handle_from_new_ptr(
                new pyopencl::device(did, /*retain*/true,
                  device::REF_CL_1_2)));
        return py_result;
      }
#endif

  };




  inline py::list platform::get_devices(cl_device_type devtype)
  {
    cl_uint num_devices = 0;
    PYOPENCL_PRINT_CALL_TRACE("clGetDeviceIDs");
    {
      cl_int status_code;
      status_code = clGetDeviceIDs(m_platform, devtype, 0, 0, &num_devices);
      if (status_code == CL_DEVICE_NOT_FOUND)
        num_devices = 0;
      else if (status_code != CL_SUCCESS) \
        throw pyopencl::error("clGetDeviceIDs", status_code);
    }

    if (num_devices == 0)
      return py::list();

    std::vector<cl_device_id> devices(num_devices);
    PYOPENCL_CALL_GUARDED(clGetDeviceIDs,
        (m_platform, devtype,
Andreas Klöckner's avatar
Andreas Klöckner committed
         num_devices, devices.empty( ) ? nullptr : &devices.front(), &num_devices));

    py::list result;
    for (cl_device_id did: devices)
      result.append(handle_from_new_ptr(
            new device(did)));

    return result;
  }

  // }}}

  // {{{ context

  class context : public noncopyable
  {
    private:
      cl_context m_context;

    public:
      context(cl_context ctx, bool retain)
        : m_context(ctx)
      {
        if (retain)
          PYOPENCL_CALL_GUARDED(clRetainContext, (ctx));
      }

      ~context()
      {
        PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseContext,
            (m_context));
      }

      cl_context data() const
      {
        return m_context;
      }

      PYOPENCL_EQUALITY_TESTS(context);

      py::object get_info(cl_context_info param_name) const
      {
        switch (param_name)
        {
          case CL_CONTEXT_REFERENCE_COUNT:
            PYOPENCL_GET_INTEGRAL_INFO(
                Context, m_context, param_name, cl_uint);

          case CL_CONTEXT_DEVICES:
            {
              std::vector<cl_device_id> result;
              PYOPENCL_GET_VEC_INFO(Context, m_context, param_name, result);

              py::list py_result;
              for (cl_device_id did: result)
                py_result.append(handle_from_new_ptr(
                      new pyopencl::device(did)));
              return py_result;
            }

          case CL_CONTEXT_PROPERTIES:
            {
              std::vector<cl_context_properties> result;
              PYOPENCL_GET_VEC_INFO(Context, m_context, param_name, result);

              py::list py_result;
              for (size_t i = 0; i < result.size(); i+=2)
              {
                cl_context_properties key = result[i];
                py::object value;
                switch (key)
                {
                  case CL_CONTEXT_PLATFORM:
                    {
                      value = py::object(
                          handle_from_new_ptr(new platform(
                            reinterpret_cast<cl_platform_id>(result[i+1]))));
                      break;
                    }

#if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1)
#if defined(__APPLE__) && defined(HAVE_GL)
                  case CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE:
#else
                  case CL_GL_CONTEXT_KHR:
                  case CL_EGL_DISPLAY_KHR:
                  case CL_GLX_DISPLAY_KHR:
                  case CL_WGL_HDC_KHR:
                  case CL_CGL_SHAREGROUP_KHR:
#endif
                    value = py::cast(result[i+1]);
                    break;

#endif
                  case 0:
                    break;

                  default:
                    throw error("Context.get_info", CL_INVALID_VALUE,
                        "unknown context_property key encountered");
                }

                py_result.append(py::make_tuple(result[i], value));
              }
              return py_result;
            }

#if PYOPENCL_CL_VERSION >= 0x1010
          case CL_CONTEXT_NUM_DEVICES:
            PYOPENCL_GET_INTEGRAL_INFO(
                Context, m_context, param_name, cl_uint);
#endif

          default:
            throw error("Context.get_info", CL_INVALID_VALUE);
        }
      }
  };


  inline
  std::vector<cl_context_properties> parse_context_properties(
      py::object py_properties)
  {
    std::vector<cl_context_properties> props;

    if (py_properties.ptr() != Py_None)
    {
      for (py::handle prop_tuple_py: py_properties)
        py::tuple prop_tuple(prop_tuple_py.cast<py::tuple>());

        if (len(prop_tuple) != 2)
          throw error("Context", CL_INVALID_VALUE, "property tuple must have length 2");
        cl_context_properties prop = prop_tuple[0].cast<cl_context_properties>();
        props.push_back(prop);

        if (prop == CL_CONTEXT_PLATFORM)
        {
          props.push_back(
              reinterpret_cast<cl_context_properties>(
                prop_tuple[1].cast<const platform &>().data()));
        }
#if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1)
#if defined(_WIN32)
       else if (prop == CL_WGL_HDC_KHR)
       {
         // size_t is a stand-in for HANDLE, hopefully has the same size.
         size_t hnd = (prop_tuple[1]).cast<size_t>();
         props.push_back(hnd);
       }
#endif
       else if (
#if defined(__APPLE__) && defined(HAVE_GL)
            prop == CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE
#else
            prop == CL_GL_CONTEXT_KHR
            || prop == CL_EGL_DISPLAY_KHR
            || prop == CL_GLX_DISPLAY_KHR
            || prop == CL_CGL_SHAREGROUP_KHR
#endif
           )
       {
          py::object ctypes = py::module::import("ctypes");
          py::object prop = prop_tuple[1], c_void_p = ctypes.attr("c_void_p");
          py::object ptr = ctypes.attr("cast")(prop, c_void_p);
          props.push_back(ptr.attr("value").cast<cl_context_properties>());
       }
#endif
        else
          throw error("Context", CL_INVALID_VALUE, "invalid context property");
      }
      props.push_back(0);
    }

    return props;
  }


  inline
  context *create_context_inner(py::object py_devices, py::object py_properties,
      py::object py_dev_type)
  {
    std::vector<cl_context_properties> props
      = parse_context_properties(py_properties);

    cl_context_properties *props_ptr
Andreas Klöckner's avatar
Andreas Klöckner committed
      = props.empty( ) ? nullptr : &props.front();

    cl_int status_code;

    cl_context ctx;

    // from device list
    if (py_devices.ptr() != Py_None)
    {
      if (py_dev_type.ptr() != Py_None)
        throw error("Context", CL_INVALID_VALUE,
            "one of 'devices' or 'dev_type' must be None");

      std::vector<cl_device_id> devices;
Andreas Klöckner's avatar
Andreas Klöckner committed
      for (py::handle py_dev: py_devices)
        devices.push_back(py_dev.cast<const device &>().data());

      PYOPENCL_PRINT_CALL_TRACE("clCreateContext");
      ctx = clCreateContext(
          props_ptr,
          devices.size(),
Andreas Klöckner's avatar
Andreas Klöckner committed
          devices.empty( ) ? nullptr : &devices.front(),
          0, 0, &status_code);
    }
    // from dev_type
    else
    {
      cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT;
      if (py_dev_type.ptr() != Py_None)
        dev_type = py_dev_type.cast<cl_device_type>();

      PYOPENCL_PRINT_CALL_TRACE("clCreateContextFromType");
      ctx = clCreateContextFromType(props_ptr, dev_type, 0, 0, &status_code);
    }

    if (status_code != CL_SUCCESS)
      throw pyopencl::error("Context", status_code);

    try
    {
      return new context(ctx, false);
    }
    catch (...)
    {
      PYOPENCL_CALL_GUARDED(clReleaseContext, (ctx));
      throw;
    }
  }


  inline
  context *create_context(py::object py_devices, py::object py_properties,
      py::object py_dev_type)
  {
    PYOPENCL_RETRY_RETURN_IF_MEM_ERROR(
      return create_context_inner(py_devices, py_properties, py_dev_type);
    )
  }

  // }}}

  // {{{ command_queue
  class command_queue
  {
    private:
      cl_command_queue m_queue;

    public:
      command_queue(cl_command_queue q, bool retain)
        : m_queue(q)
      {
        if (retain)
          PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (q));
      }

      command_queue(command_queue const &src)
        : m_queue(src.m_queue)
      {
        PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue));
      }

      command_queue(
          const context &ctx,
          const device *py_dev=0,
          cl_command_queue_properties props=0)
      {
        cl_device_id dev;
        if (py_dev)
          dev = py_dev->data();
        else
        {
          std::vector<cl_device_id> devs;
          PYOPENCL_GET_VEC_INFO(Context, ctx.data(), CL_CONTEXT_DEVICES, devs);
          if (devs.size() == 0)
            throw pyopencl::error("CommandQueue", CL_INVALID_VALUE,
                "context doesn't have any devices? -- don't know which one to default to");
          dev = devs[0];
        }

        cl_int status_code;
        PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueue");
        m_queue = clCreateCommandQueue(
            ctx.data(), dev, props, &status_code);

        if (status_code != CL_SUCCESS)
          throw pyopencl::error("CommandQueue", status_code);
      }

      ~command_queue()
      {
        PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseCommandQueue,
            (m_queue));
      }

      const cl_command_queue data() const
      { return m_queue; }

      PYOPENCL_EQUALITY_TESTS(command_queue);

      py::object get_info(cl_command_queue_info param_name) const
      {
        switch (param_name)
        {
          case CL_QUEUE_CONTEXT:
            PYOPENCL_GET_OPAQUE_INFO(CommandQueue, m_queue, param_name,
                cl_context, context);
          case CL_QUEUE_DEVICE:
            PYOPENCL_GET_OPAQUE_INFO(CommandQueue, m_queue, param_name,
                cl_device_id, device);
          case CL_QUEUE_REFERENCE_COUNT:
            PYOPENCL_GET_INTEGRAL_INFO(CommandQueue, m_queue, param_name,
                cl_uint);
          case CL_QUEUE_PROPERTIES:
            PYOPENCL_GET_INTEGRAL_INFO(CommandQueue, m_queue, param_name,
                cl_command_queue_properties);

          default:
            throw error("CommandQueue.get_info", CL_INVALID_VALUE);
        }
      }

      std::unique_ptr<context> get_context() const
      {
        cl_context param_value;
        PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo,
            (m_queue, CL_QUEUE_CONTEXT, sizeof(param_value), &param_value, 0));
        return std::unique_ptr<context>(
            new context(param_value, /*retain*/ true));
      }

#if PYOPENCL_CL_VERSION < 0x1010
      cl_command_queue_properties set_property(
          cl_command_queue_properties prop,
          bool enable)
      {
        cl_command_queue_properties old_prop;
        PYOPENCL_CALL_GUARDED(clSetCommandQueueProperty,
            (m_queue, prop, PYOPENCL_CAST_BOOL(enable), &old_prop));
        return old_prop;
      }
#endif

      void flush()
      { PYOPENCL_CALL_GUARDED(clFlush, (m_queue)); }
      void finish()
      { PYOPENCL_CALL_GUARDED_THREADED(clFinish, (m_queue)); }
  };

  // }}}

  // {{{ event/synchronization

  class event : noncopyable
  {
    private:
      cl_event m_event;

    public:
      event(cl_event event, bool retain)
        : m_event(event)
      {
        if (retain)
          PYOPENCL_CALL_GUARDED(clRetainEvent, (event));
      }

      event(event const &src)
        : m_event(src.m_event)
      { PYOPENCL_CALL_GUARDED(clRetainEvent, (m_event)); }

      virtual ~event()
      {
        PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseEvent,
            (m_event));
      }

      const cl_event data() const
      { return m_event; }

      PYOPENCL_EQUALITY_TESTS(event);

      py::object get_info(cl_event_info param_name) const
      {
        switch (param_name)
        {
          case CL_EVENT_COMMAND_QUEUE:
            PYOPENCL_GET_OPAQUE_INFO(Event, m_event, param_name,
                cl_command_queue, command_queue);
          case CL_EVENT_COMMAND_TYPE:
            PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name,
                cl_command_type);
          case CL_EVENT_COMMAND_EXECUTION_STATUS:
            PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name,
                cl_int);
          case CL_EVENT_REFERENCE_COUNT:
            PYOPENCL_GET_INTEGRAL_INFO(Event, m_event, param_name,
                cl_uint);
#if PYOPENCL_CL_VERSION >= 0x1010
          case CL_EVENT_CONTEXT:
            PYOPENCL_GET_OPAQUE_INFO(Event, m_event, param_name,
                cl_context, context);
#endif

          default:
            throw error("Event.get_info", CL_INVALID_VALUE);
        }
      }

      py::object get_profiling_info(cl_profiling_info param_name) const
      {
        switch (param_name)
        {
          case CL_PROFILING_COMMAND_QUEUED:
          case CL_PROFILING_COMMAND_SUBMIT:
          case CL_PROFILING_COMMAND_START:
          case CL_PROFILING_COMMAND_END:
Andreas Klöckner's avatar
Andreas Klöckner committed
#if PYOPENCL_CL_VERSION >= 0x2000
          case CL_PROFILING_COMMAND_COMPLETE:
#endif
            PYOPENCL_GET_INTEGRAL_INFO(EventProfiling, m_event, param_name,
                cl_ulong);
          default:
            throw error("Event.get_profiling_info", CL_INVALID_VALUE);
        }
      }

      virtual void wait()
      {
        PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, (1, &m_event));
      }
  };

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
  class nanny_event : public event
  {
    // In addition to everything an event does, the nanny event holds a reference
    // to a Python object and waits for its own completion upon destruction.

    protected:
      std::unique_ptr<py_buffer_wrapper> m_ward;
      nanny_event(cl_event evt, bool retain, std::unique_ptr<py_buffer_wrapper> &ward)
        : event(evt, retain), m_ward(std::move(ward))
      { }

      ~nanny_event()
      { wait(); }

      py::object get_ward() const
      {
        if (m_ward.get())
        {
          return py::reinterpret_borrow<py::object>(m_ward->m_buf.obj);
          return py::none();
      }

      virtual void wait()
      {
        event::wait();
        m_ward.reset();
      }
  };
#else
  class nanny_event : public event
  {
    // In addition to everything an event does, the nanny event holds a reference
    // to a Python object and waits for its own completion upon destruction.

    protected:
      py::object        m_ward;

    public:

      nanny_event(cl_event evt, bool retain, py::object ward)
        : event(evt, retain), m_ward(ward)
      { }

      nanny_event(nanny_event const &src)
        : event(src), m_ward(src.m_ward)
      { }

      ~nanny_event()
      { wait(); }

      py::object get_ward() const
      { return m_ward; }

      virtual void wait()
      {
        event::wait();
        m_ward = py::none();
      }
  };
#endif




  inline
  void wait_for_events(py::object events)
  {
    cl_uint num_events_in_wait_list = 0;
    std::vector<cl_event> event_wait_list(len(events));

    for (py::handle evt: events)
      event_wait_list[num_events_in_wait_list++] =
        evt.cast<event &>().data();

    PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, (
          PYOPENCL_WAITLIST_ARGS));
  }




#if PYOPENCL_CL_VERSION >= 0x1020
  inline
  event *enqueue_marker_with_wait_list(command_queue &cq,
      py::object py_wait_for)
  {
    PYOPENCL_PARSE_WAIT_FOR;
    cl_event evt;

    PYOPENCL_CALL_GUARDED(clEnqueueMarkerWithWaitList, (
          cq.data(), PYOPENCL_WAITLIST_ARGS, &evt));

    PYOPENCL_RETURN_NEW_EVENT(evt);
  }

  inline
  event *enqueue_barrier_with_wait_list(command_queue &cq,
      py::object py_wait_for)
  {
    PYOPENCL_PARSE_WAIT_FOR;
    cl_event evt;

    PYOPENCL_CALL_GUARDED(clEnqueueBarrierWithWaitList,
        (cq.data(), PYOPENCL_WAITLIST_ARGS, &evt));

    PYOPENCL_RETURN_NEW_EVENT(evt);
  }
#endif


  // {{{ used internally for pre-OpenCL-1.2 contexts

  inline
  event *enqueue_marker(command_queue &cq)
  {
    cl_event evt;

    PYOPENCL_CALL_GUARDED(clEnqueueMarker, (
          cq.data(), &evt));

    PYOPENCL_RETURN_NEW_EVENT(evt);
  }

  inline
  void enqueue_wait_for_events(command_queue &cq, py::object py_events)
  {
    cl_uint num_events = 0;
    std::vector<cl_event> event_list(len(py_events));

    for (py::handle py_evt: py_events)
      event_list[num_events++] = py_evt.cast<event &>().data();

    PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, (
Andreas Klöckner's avatar
Andreas Klöckner committed
          cq.data(), num_events, event_list.empty( ) ? nullptr : &event_list.front()));
  }

  inline
  void enqueue_barrier(command_queue &cq)
  {
    PYOPENCL_CALL_GUARDED(clEnqueueBarrier, (cq.data()));
  }

  // }}}


#if PYOPENCL_CL_VERSION >= 0x1010
  class user_event : public event
  {
    public:
      user_event(cl_event evt, bool retain)
        : event(evt, retain)
      { }

      void set_status(cl_int execution_status)
      {
        PYOPENCL_CALL_GUARDED(clSetUserEventStatus, (data(), execution_status));
      }
  };




  inline
  user_event *create_user_event(context &ctx)
  {
    cl_int status_code;
    PYOPENCL_PRINT_CALL_TRACE("clCreateUserEvent");
    cl_event evt = clCreateUserEvent(ctx.data(), &status_code);

    if (status_code != CL_SUCCESS)
      throw pyopencl::error("UserEvent", status_code);

    try
    {
      return new user_event(evt, false);
    }
    catch (...)
    {
      clReleaseEvent(evt);
      throw;
    }
  }

#endif

  // }}}

  // {{{ memory_object

  py::object create_mem_object_wrapper(cl_mem mem);

  class memory_object_holder
  {
    public:
      virtual const cl_mem data() const = 0;

      PYOPENCL_EQUALITY_TESTS(memory_object_holder);

      size_t size() const
      {
        size_t param_value;
        PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
            (data(), CL_MEM_SIZE, sizeof(param_value), &param_value, 0));
        return param_value;
      }

      py::object get_info(cl_mem_info param_name) const;
  };




  class memory_object : noncopyable, public memory_object_holder
  {
    public:
#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
      typedef std::unique_ptr<py_buffer_wrapper> hostbuf_t;
#else
      typedef py::object hostbuf_t;
#endif

    private:
      bool m_valid;
      cl_mem m_mem;
      hostbuf_t m_hostbuf;

    public:
      memory_object(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
        : m_valid(true), m_mem(mem)
      {
        if (retain)
          PYOPENCL_CALL_GUARDED(clRetainMemObject, (mem));

        m_hostbuf = PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf);
      }

      memory_object(memory_object &src)
        : m_valid(true), m_mem(src.m_mem),
        m_hostbuf(PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(src.m_hostbuf))
      {
        PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem));
      }

      memory_object(memory_object_holder const &src)
        : m_valid(true), m_mem(src.data())
      {
        PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem));
      }

      void release()
      {
        if (!m_valid)
            throw error("MemoryObject.free", CL_INVALID_VALUE,
                "trying to double-unref mem object");
        PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseMemObject, (m_mem));
        m_valid = false;
      }

      virtual ~memory_object()
      {
        if (m_valid)
          release();
      }

      py::object hostbuf()
      {
#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
        if (m_hostbuf.get())
          return py::reinterpret_borrow<py::object>(m_hostbuf->m_buf.obj);
          return py::none();
#else
        return m_hostbuf;
#endif
      }

      const cl_mem data() const
      { return m_mem; }

  };

#if PYOPENCL_CL_VERSION >= 0x1020
  inline
  event *enqueue_migrate_mem_objects(
      command_queue &cq,
      py::object py_mem_objects,
      cl_mem_migration_flags flags,
      py::object py_wait_for)
  {
    PYOPENCL_PARSE_WAIT_FOR;

    std::vector<cl_mem> mem_objects;
    for (py::handle mo: py_mem_objects)
      mem_objects.push_back(mo.cast<const memory_object &>().data());

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED(clEnqueueMigrateMemObjects, (
            cq.data(),
Andreas Klöckner's avatar
Andreas Klöckner committed
            mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(),
            flags,
            PYOPENCL_WAITLIST_ARGS, &evt
            ));
      );
    PYOPENCL_RETURN_NEW_EVENT(evt);
  }
#endif

#ifdef cl_ext_migrate_memobject
  inline
  event *enqueue_migrate_mem_object_ext(
      command_queue &cq,
      py::object py_mem_objects,
      cl_mem_migration_flags_ext flags,
      py::object py_wait_for)
  {
    PYOPENCL_PARSE_WAIT_FOR;

#if PYOPENCL_CL_VERSION >= 0x1020
    // {{{ get platform
    cl_device_id dev;
    PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo, (cq.data(), CL_QUEUE_DEVICE,
Andreas Klöckner's avatar
Andreas Klöckner committed
          sizeof(dev), &dev, nullptr));
    cl_platform_id plat;
    PYOPENCL_CALL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_PLATFORM,
Andreas Klöckner's avatar
Andreas Klöckner committed
          sizeof(plat), &plat, nullptr));
    // }}}
#endif

    PYOPENCL_GET_EXT_FUN(plat,
        clEnqueueMigrateMemObjectEXT, enqueue_migrate_fn);

    std::vector<cl_mem> mem_objects;
    for (py::handle mo: py_mem_objects)
      mem_objects.push_back(mo.cast<memory_object &>().data());

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED(enqueue_migrate_fn, (
            cq.data(),
Andreas Klöckner's avatar
Andreas Klöckner committed
            mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(),
            flags,
            PYOPENCL_WAITLIST_ARGS, &evt
            ));
      );
    PYOPENCL_RETURN_NEW_EVENT(evt);
  }
#endif

  // }}}

  // {{{ buffer

  inline cl_mem create_buffer(
      cl_context ctx,
      cl_mem_flags flags,
      size_t size,
      void *host_ptr)
  {
    cl_int status_code;
    PYOPENCL_PRINT_CALL_TRACE("clCreateBuffer");
    cl_mem mem = clCreateBuffer(ctx, flags, size, host_ptr, &status_code);

    if (status_code != CL_SUCCESS)
      throw pyopencl::error("create_buffer", status_code);

    return mem;
  }




  inline cl_mem create_buffer_gc(
      cl_context ctx,
      cl_mem_flags flags,
      size_t size,
      void *host_ptr)
  {
    PYOPENCL_RETRY_RETURN_IF_MEM_ERROR(
      return create_buffer(ctx, flags, size, host_ptr);
    );
  }



#if PYOPENCL_CL_VERSION >= 0x1010
  inline cl_mem create_sub_buffer(
      cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bct,
      const void *buffer_create_info)
  {
    cl_int status_code;
    PYOPENCL_PRINT_CALL_TRACE("clCreateSubBuffer");
    cl_mem mem = clCreateSubBuffer(buffer, flags,
        bct, buffer_create_info, &status_code);

    if (status_code != CL_SUCCESS)
      throw pyopencl::error("clCreateSubBuffer", status_code);

    return mem;
  }




  inline cl_mem create_sub_buffer_gc(
      cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bct,
      const void *buffer_create_info)
  {
    PYOPENCL_RETRY_RETURN_IF_MEM_ERROR(
      return create_sub_buffer(buffer, flags, bct, buffer_create_info);
    );
  }
#endif



  class buffer : public memory_object
  {
    public:
      buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
        : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf))
      { }

#if PYOPENCL_CL_VERSION >= 0x1010
      buffer *get_sub_region(
          size_t origin, size_t size, cl_mem_flags flags) const
      {
        cl_buffer_region region = { origin, size};

        cl_mem mem = create_sub_buffer_gc(
            data(), flags, CL_BUFFER_CREATE_TYPE_REGION, &region);

        try
        {
          return new buffer(mem, false);
        }
        catch (...)
        {
          PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem));
          throw;
        }
      }

      buffer *getitem(py::slice slc) const
      {
        PYOPENCL_BUFFER_SIZE_T start, end, stride, length;

        size_t my_length;
        PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
            (data(), CL_MEM_SIZE, sizeof(my_length), &my_length, 0));

#if PY_VERSION_HEX >= 0x03020000
        if (PySlice_GetIndicesEx(slc.ptr(),
#else
        if (PySlice_GetIndicesEx(reinterpret_cast<PySliceObject *>(slc.ptr()),
#endif
              my_length, &start, &end, &stride, &length) != 0)
          throw py::error_already_set();

        if (stride != 1)
          throw pyopencl::error("Buffer.__getitem__", CL_INVALID_VALUE,
              "Buffer slice must have stride 1");

        cl_mem_flags my_flags;
        PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
            (data(), CL_MEM_FLAGS, sizeof(my_flags), &my_flags, 0));

        my_flags &= ~CL_MEM_COPY_HOST_PTR;

        if (end <= start)
          throw pyopencl::error("Buffer.__getitem__", CL_INVALID_VALUE,
              "Buffer slice have end > start");

        return get_sub_region(start, end-start, my_flags);
      }
#endif
  };

  // {{{ buffer creation

  inline
  buffer *create_buffer_py(
      context &ctx,
      cl_mem_flags flags,
      size_t size,
      py::object py_hostbuf
      )
  {
    if (py_hostbuf.ptr() != Py_None &&
        !(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))
      PyErr_Warn(PyExc_UserWarning, "'hostbuf' was passed, "
          "but no memory flags to make use of it.");

    void *buf = 0;

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    std::unique_ptr<py_buffer_wrapper> retained_buf_obj;
    if (py_hostbuf.ptr() != Py_None)
    {
      retained_buf_obj = std::unique_ptr<py_buffer_wrapper>(new py_buffer_wrapper);

      int py_buf_flags = PyBUF_ANY_CONTIGUOUS;
      if ((flags & CL_MEM_USE_HOST_PTR)
          && ((flags & CL_MEM_READ_WRITE)
            || (flags & CL_MEM_WRITE_ONLY)))
        py_buf_flags |= PyBUF_WRITABLE;

      retained_buf_obj->get(py_hostbuf.ptr(), py_buf_flags);

      buf = retained_buf_obj->m_buf.buf;

      if (size > size_t(retained_buf_obj->m_buf.len))
        throw pyopencl::error("Buffer", CL_INVALID_VALUE,
            "specified size is greater than host buffer size");
      if (size == 0)
        size = retained_buf_obj->m_buf.len;
    }
#else
    py::object retained_buf_obj;
    if (py_hostbuf.ptr() != Py_None)
    {
      PYOPENCL_BUFFER_SIZE_T len;
      if ((flags & CL_MEM_USE_HOST_PTR)
          && ((flags & CL_MEM_READ_WRITE)
            || (flags & CL_MEM_WRITE_ONLY)))
      {
        if (PyObject_AsWriteBuffer(py_hostbuf.ptr(), &buf, &len))
          throw py::error_already_set();
      }
      else
      {
        if (PyObject_AsReadBuffer(
              py_hostbuf.ptr(), const_cast<const void **>(&buf), &len))
          throw py::error_already_set();
      }

      if (flags & CL_MEM_USE_HOST_PTR)
        retained_buf_obj = py_hostbuf;

      if (size > size_t(len))
        throw pyopencl::error("Buffer", CL_INVALID_VALUE,
            "specified size is greater than host buffer size");
      if (size == 0)
        size = len;
    }
#endif

    cl_mem mem = create_buffer_gc(ctx.data(), flags, size, buf);

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    if (!(flags & CL_MEM_USE_HOST_PTR))
      retained_buf_obj.reset();
#endif

    try
    {
      return new buffer(mem, false, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(retained_buf_obj));
    }
    catch (...)
    {
      PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem));
      throw;
    }
  }

  // }}}

  // {{{ buffer transfers

  // {{{ byte-for-byte transfers

  inline
  event *enqueue_read_buffer(
      command_queue &cq,
      memory_object_holder &mem,
      py::object buffer,
      size_t device_offset,
      py::object py_wait_for,
      bool is_blocking)
  {
    PYOPENCL_PARSE_WAIT_FOR;

    void *buf;
    PYOPENCL_BUFFER_SIZE_T len;

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);

    ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE);

    buf = ward->m_buf.buf;
    len = ward->m_buf.len;
#else
    py::object ward = buffer;
    if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len))
      throw py::error_already_set();
#endif

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED_THREADED(clEnqueueReadBuffer, (
            cq.data(),
            mem.data(),
            PYOPENCL_CAST_BOOL(is_blocking),
            device_offset, len, buf,
            PYOPENCL_WAITLIST_ARGS, &evt
            ))
      );
    PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward);
  }




  inline
  event *enqueue_write_buffer(
      command_queue &cq,
      memory_object_holder &mem,
      py::object buffer,
      size_t device_offset,
      py::object py_wait_for,
      bool is_blocking)
  {
    PYOPENCL_PARSE_WAIT_FOR;

    const void *buf;
    PYOPENCL_BUFFER_SIZE_T len;

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);

    ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS);

    buf = ward->m_buf.buf;
    len = ward->m_buf.len;
#else
    py::object ward = buffer;
    if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len))
      throw py::error_already_set();
#endif

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED_THREADED(clEnqueueWriteBuffer, (
            cq.data(),
            mem.data(),
            PYOPENCL_CAST_BOOL(is_blocking),
            device_offset, len, buf,
            PYOPENCL_WAITLIST_ARGS, &evt
            ))
      );
    PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward);
  }




  inline
  event *enqueue_copy_buffer(
      command_queue &cq,
      memory_object_holder &src,
      memory_object_holder &dst,
      ptrdiff_t byte_count,
      size_t src_offset,
      size_t dst_offset,
      py::object py_wait_for)
  {
    PYOPENCL_PARSE_WAIT_FOR;

    if (byte_count < 0)
    {
      size_t byte_count_src = 0;
      size_t byte_count_dst = 0;
      PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
          (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_src, 0));
      PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
          (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_dst, 0));
      byte_count = std::min(byte_count_src, byte_count_dst);
    }

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED(clEnqueueCopyBuffer, (
            cq.data(),
            src.data(), dst.data(),
            src_offset, dst_offset,
            byte_count,
            PYOPENCL_WAITLIST_ARGS,
            &evt
            ))
      );

    PYOPENCL_RETURN_NEW_EVENT(evt);
  }

  // }}}

  // {{{ rectangular transfers
#if PYOPENCL_CL_VERSION >= 0x1010
  inline
  event *enqueue_read_buffer_rect(
      command_queue &cq,
      memory_object_holder &mem,
      py::object buffer,
      py::object py_buffer_origin,
      py::object py_host_origin,
      py::object py_region,
      py::sequence py_buffer_pitches,
      py::sequence py_host_pitches,
      py::object py_wait_for,
      bool is_blocking
      )
  {
    PYOPENCL_PARSE_WAIT_FOR;
    COPY_PY_COORD_TRIPLE(buffer_origin);
    COPY_PY_COORD_TRIPLE(host_origin);
    COPY_PY_REGION_TRIPLE(region);
    COPY_PY_PITCH_TUPLE(buffer_pitches);
    COPY_PY_PITCH_TUPLE(host_pitches);

    void *buf;

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);

    ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE);

    buf = ward->m_buf.buf;
#else
    py::object ward = buffer;

    PYOPENCL_BUFFER_SIZE_T len;
    if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len))
      throw py::error_already_set();
#endif

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED_THREADED(clEnqueueReadBufferRect, (
            cq.data(),
            mem.data(),
            PYOPENCL_CAST_BOOL(is_blocking),
            buffer_origin, host_origin, region,
            buffer_pitches[0], buffer_pitches[1],
            host_pitches[0], host_pitches[1],
            buf,
            PYOPENCL_WAITLIST_ARGS, &evt
            ))
      );
    PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward);
  }




  inline
  event *enqueue_write_buffer_rect(
      command_queue &cq,
      memory_object_holder &mem,
      py::object buffer,
      py::object py_buffer_origin,
      py::object py_host_origin,
      py::object py_region,
      py::sequence py_buffer_pitches,
      py::sequence py_host_pitches,
      py::object py_wait_for,
      bool is_blocking
      )
  {
    PYOPENCL_PARSE_WAIT_FOR;
    COPY_PY_COORD_TRIPLE(buffer_origin);
    COPY_PY_COORD_TRIPLE(host_origin);
    COPY_PY_REGION_TRIPLE(region);
    COPY_PY_PITCH_TUPLE(buffer_pitches);
    COPY_PY_PITCH_TUPLE(host_pitches);

    const void *buf;

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);

    ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS);

    buf = ward->m_buf.buf;
#else
    py::object ward = buffer;
    PYOPENCL_BUFFER_SIZE_T len;
    if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len))
      throw py::error_already_set();
#endif

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED_THREADED(clEnqueueWriteBufferRect, (
            cq.data(),
            mem.data(),
            PYOPENCL_CAST_BOOL(is_blocking),
            buffer_origin, host_origin, region,
            buffer_pitches[0], buffer_pitches[1],
            host_pitches[0], host_pitches[1],
            buf,
            PYOPENCL_WAITLIST_ARGS, &evt
            ))
      );
    PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward);
  }




  inline
  event *enqueue_copy_buffer_rect(
      command_queue &cq,
      memory_object_holder &src,
      memory_object_holder &dst,
      py::object py_src_origin,
      py::object py_dst_origin,
      py::object py_region,
      py::sequence py_src_pitches,
      py::sequence py_dst_pitches,
      py::object py_wait_for)
  {
    PYOPENCL_PARSE_WAIT_FOR;
    COPY_PY_COORD_TRIPLE(src_origin);
    COPY_PY_COORD_TRIPLE(dst_origin);
    COPY_PY_REGION_TRIPLE(region);
    COPY_PY_PITCH_TUPLE(src_pitches);
    COPY_PY_PITCH_TUPLE(dst_pitches);

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED(clEnqueueCopyBufferRect, (
            cq.data(),
            src.data(), dst.data(),
            src_origin, dst_origin, region,
            src_pitches[0], src_pitches[1],
            dst_pitches[0], dst_pitches[1],
            PYOPENCL_WAITLIST_ARGS,
            &evt
            ))
      );

    PYOPENCL_RETURN_NEW_EVENT(evt);
Loading
Loading full blame...