Skip to content
Snippets Groups Projects
wrap_cl.hpp 162 KiB
Newer Older
  • Learn to ignore specific revisions
  •           case CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION:
              case CL_DEVICE_OPENCL_C_ALL_VERSIONS:
              case CL_DEVICE_OPENCL_C_FEATURES:
                {
                  std::vector<cl_name_version> result;
                  PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result);
                  PYOPENCL_RETURN_VECTOR(cl_name_version, result);
                }
              case CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES: DEV_GET_INT_INF(cl_device_atomic_capabilities);
              case CL_DEVICE_ATOMIC_FENCE_CAPABILITIES: DEV_GET_INT_INF(cl_device_atomic_capabilities);
              case CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT: DEV_GET_INT_INF(cl_bool);
              case CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: DEV_GET_INT_INF(size_t);
              case CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT: DEV_GET_INT_INF(cl_bool);
              case CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT: DEV_GET_INT_INF(cl_bool);
    
              case CL_DEVICE_DEVICE_ENQUEUE_SUPPORT: DEV_GET_INT_INF(cl_bool);
    
    #endif
    #ifdef CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES
    
              case CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES: DEV_GET_INT_INF(cl_device_device_enqueue_capabilities);
    
              case CL_DEVICE_PIPE_SUPPORT: DEV_GET_INT_INF(cl_bool);
    #endif
    
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
    #ifdef CL_DEVICE_ME_VERSION_INTEL
              case CL_DEVICE_ME_VERSION_INTEL: DEV_GET_INT_INF(cl_uint);
    #endif
    #ifdef CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM
              case CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM: DEV_GET_INT_INF(cl_uint);
    #endif
    #ifdef CL_DEVICE_PAGE_SIZE_QCOM
              case CL_DEVICE_PAGE_SIZE_QCOM: DEV_GET_INT_INF(cl_uint);
    #endif
    #ifdef CL_DEVICE_SPIR_VERSIONS
              case CL_DEVICE_SPIR_VERSIONS:
                PYOPENCL_GET_STR_INFO(Device, m_device, param_name);
    #endif
    #ifdef CL_DEVICE_CORE_TEMPERATURE_ALTERA
              case CL_DEVICE_CORE_TEMPERATURE_ALTERA: DEV_GET_INT_INF(cl_int);
    #endif
    
    #ifdef CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL
              case CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL:
                {
                  std::vector<cl_uint> result;
                  PYOPENCL_GET_VEC_INFO(Device, m_device, param_name, result);
                  PYOPENCL_RETURN_VECTOR(cl_uint, result);
                }
    #endif
    #ifdef CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL
              case CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL: DEV_GET_INT_INF(cl_uint);
    #endif
    
    
              default:
                throw error("Device.get_info", CL_INVALID_VALUE);
            }
          }
    
    #if PYOPENCL_CL_VERSION >= 0x1020
          py::list create_sub_devices(py::object py_properties)
          {
            std::vector<cl_device_partition_property> properties;
    
            COPY_PY_LIST(cl_device_partition_property, properties);
            properties.push_back(0);
    
            cl_device_partition_property *props_ptr
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
              = properties.empty( ) ? nullptr : &properties.front();
    
    
            cl_uint num_entries;
            PYOPENCL_CALL_GUARDED(clCreateSubDevices,
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
                (m_device, props_ptr, 0, nullptr, &num_entries));
    
    
            std::vector<cl_device_id> result;
            result.resize(num_entries);
    
            PYOPENCL_CALL_GUARDED(clCreateSubDevices,
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
                (m_device, props_ptr, num_entries, &result.front(), nullptr));
    
    
            py::list py_result;
    
            for (cl_device_id did: result)
    
              py_result.append(handle_from_new_ptr(
                    new pyopencl::device(did, /*retain*/true,
                      device::REF_CL_1_2)));
            return py_result;
          }
    #endif
    
    
    #if PYOPENCL_CL_VERSION >= 0x2010
          py::tuple device_and_host_timer() const
          {
            cl_ulong device_timestamp, host_timestamp;
            PYOPENCL_CALL_GUARDED(clGetDeviceAndHostTimer,
                (m_device, &device_timestamp, &host_timestamp));
            return py::make_tuple(device_timestamp, host_timestamp);
          }
    
          cl_ulong host_timer() const
          {
            cl_ulong host_timestamp;
            PYOPENCL_CALL_GUARDED(clGetHostTimer,
                (m_device, &host_timestamp));
            return host_timestamp;
          }
    #endif
    
      };
    
    
    
    
      inline py::list platform::get_devices(cl_device_type devtype)
      {
        cl_uint num_devices = 0;
        PYOPENCL_PRINT_CALL_TRACE("clGetDeviceIDs");
        {
          cl_int status_code;
          status_code = clGetDeviceIDs(m_platform, devtype, 0, 0, &num_devices);
          if (status_code == CL_DEVICE_NOT_FOUND)
            num_devices = 0;
          else if (status_code != CL_SUCCESS) \
            throw pyopencl::error("clGetDeviceIDs", status_code);
        }
    
        if (num_devices == 0)
          return py::list();
    
        std::vector<cl_device_id> devices(num_devices);
        PYOPENCL_CALL_GUARDED(clGetDeviceIDs,
            (m_platform, devtype,
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
             num_devices, devices.empty( ) ? nullptr : &devices.front(), &num_devices));
    
        for (cl_device_id did: devices)
    
          result.append(handle_from_new_ptr(
                new device(did)));
    
        return result;
      }
    
      // }}}
    
    
      // {{{ context
    
    
      class context : public noncopyable
    
      {
        private:
          cl_context m_context;
    
        public:
          context(cl_context ctx, bool retain)
            : m_context(ctx)
          {
            if (retain)
              PYOPENCL_CALL_GUARDED(clRetainContext, (ctx));
          }
    
          ~context()
          {
            PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseContext,
                (m_context));
          }
    
          cl_context data() const
          {
            return m_context;
          }
    
          PYOPENCL_EQUALITY_TESTS(context);
    
          py::object get_info(cl_context_info param_name) const
          {
            switch (param_name)
            {
              case CL_CONTEXT_REFERENCE_COUNT:
    
                PYOPENCL_GET_TYPED_INFO(
    
                    Context, m_context, param_name, cl_uint);
    
              case CL_CONTEXT_DEVICES:
                {
                  std::vector<cl_device_id> result;
                  PYOPENCL_GET_VEC_INFO(Context, m_context, param_name, result);
    
                  py::list py_result;
    
                  for (cl_device_id did: result)
    
                    py_result.append(handle_from_new_ptr(
                          new pyopencl::device(did)));
                  return py_result;
                }
    
              case CL_CONTEXT_PROPERTIES:
                {
                  std::vector<cl_context_properties> result;
                  PYOPENCL_GET_VEC_INFO(Context, m_context, param_name, result);
    
                  py::list py_result;
                  for (size_t i = 0; i < result.size(); i+=2)
                  {
                    cl_context_properties key = result[i];
                    py::object value;
                    switch (key)
                    {
                      case CL_CONTEXT_PLATFORM:
                        {
                          value = py::object(
                              handle_from_new_ptr(new platform(
                                reinterpret_cast<cl_platform_id>(result[i+1]))));
                          break;
                        }
    
    #if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1)
    #if defined(__APPLE__) && defined(HAVE_GL)
                      case CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE:
    #else
                      case CL_GL_CONTEXT_KHR:
                      case CL_EGL_DISPLAY_KHR:
                      case CL_GLX_DISPLAY_KHR:
                      case CL_WGL_HDC_KHR:
                      case CL_CGL_SHAREGROUP_KHR:
    #endif
    
                        value = py::cast(result[i+1]);
    
                        break;
    
    #endif
                      case 0:
                        break;
    
                      default:
                        throw error("Context.get_info", CL_INVALID_VALUE,
                            "unknown context_property key encountered");
                    }
    
                    py_result.append(py::make_tuple(result[i], value));
                  }
                  return py_result;
                }
    
    #if PYOPENCL_CL_VERSION >= 0x1010
              case CL_CONTEXT_NUM_DEVICES:
    
                PYOPENCL_GET_TYPED_INFO(
    
                    Context, m_context, param_name, cl_uint);
    #endif
    
              default:
                throw error("Context.get_info", CL_INVALID_VALUE);
            }
          }
    
    
    
          // not exposed to python
          int get_hex_platform_version() const
          {
            std::vector<cl_device_id> devices;
            PYOPENCL_GET_VEC_INFO(Context, m_context, CL_CONTEXT_DEVICES, devices);
    
            if (devices.size() == 0)
              throw error("Context._get_hex_version", CL_INVALID_VALUE,
                  "platform has no devices");
    
            cl_platform_id plat;
    
            PYOPENCL_CALL_GUARDED(clGetDeviceInfo,
                (devices[0], CL_DEVICE_PLATFORM, sizeof(plat), &plat, nullptr));
    
            std::string plat_version;
            {
              size_t param_value_size;
              PYOPENCL_CALL_GUARDED(clGetPlatformInfo,
                  (plat, CL_PLATFORM_VERSION, 0, 0, &param_value_size));
    
              std::vector<char> param_value(param_value_size);
              PYOPENCL_CALL_GUARDED(clGetPlatformInfo,
                  (plat, CL_PLATFORM_VERSION, param_value_size,
                   param_value.empty( ) ? nullptr : &param_value.front(), &param_value_size));
    
              plat_version =
                  param_value.empty( ) ? "" : std::string(&param_value.front(), param_value_size-1);
            }
    
            int major_ver, minor_ver;
            errno = 0;
            int match_count = sscanf(plat_version.c_str(), "OpenCL %d.%d ", &major_ver, &minor_ver);
            if (errno || match_count != 2)
    
              throw error("Context._get_hex_platform_version", CL_INVALID_VALUE,
    
                  "Platform version string did not have expected format");
    
            return major_ver << 12 | minor_ver << 4;
          }
    
    
    #if PYOPENCL_CL_VERSION >= 0x2010
          void set_default_device_command_queue(device const &dev, command_queue const &queue);
    #endif
    
      };
    
    
      inline
      std::vector<cl_context_properties> parse_context_properties(
          py::object py_properties)
      {
        std::vector<cl_context_properties> props;
    
        if (py_properties.ptr() != Py_None)
        {
    
          for (py::handle prop_tuple_py: py_properties)
    
            py::tuple prop_tuple(py::cast<py::tuple>(prop_tuple_py));
    
            if (len(prop_tuple) != 2)
              throw error("Context", CL_INVALID_VALUE, "property tuple must have length 2");
    
            cl_context_properties prop = py::cast<cl_context_properties>(prop_tuple[0]);
    
            props.push_back(prop);
    
            if (prop == CL_CONTEXT_PLATFORM)
            {
              props.push_back(
    
                  reinterpret_cast<cl_context_properties>(
    
                    py::cast<const platform &>(prop_tuple[1]).data()));
    
            }
    #if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1)
    #if defined(_WIN32)
           else if (prop == CL_WGL_HDC_KHR)
           {
             // size_t is a stand-in for HANDLE, hopefully has the same size.
    
             size_t hnd = py::cast<size_t>(prop_tuple[1]);
    
             props.push_back(hnd);
           }
    #endif
           else if (
    #if defined(__APPLE__) && defined(HAVE_GL)
                prop == CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE
    #else
                prop == CL_GL_CONTEXT_KHR
                || prop == CL_EGL_DISPLAY_KHR
                || prop == CL_GLX_DISPLAY_KHR
                || prop == CL_CGL_SHAREGROUP_KHR
    #endif
               )
           {
    
              py::object ctypes = py::module_::import_("ctypes");
    
              py::object prop = prop_tuple[1], c_void_p = ctypes.attr("c_void_p");
              py::object ptr = ctypes.attr("cast")(prop, c_void_p);
    
              props.push_back(py::cast<cl_context_properties>(ptr.attr("value")));
    
           }
    #endif
            else
              throw error("Context", CL_INVALID_VALUE, "invalid context property");
          }
          props.push_back(0);
        }
    
        return props;
      }
    
    
      inline
    
      void create_context_inner(context *self, py::object py_devices, py::object py_properties,
    
          py::object py_dev_type)
      {
        std::vector<cl_context_properties> props
          = parse_context_properties(py_properties);
    
        cl_context_properties *props_ptr
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
          = props.empty( ) ? nullptr : &props.front();
    
    
        cl_int status_code;
    
        cl_context ctx;
    
        // from device list
        if (py_devices.ptr() != Py_None)
        {
          if (py_dev_type.ptr() != Py_None)
            throw error("Context", CL_INVALID_VALUE,
                "one of 'devices' or 'dev_type' must be None");
    
          std::vector<cl_device_id> devices;
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
          for (py::handle py_dev: py_devices)
    
            devices.push_back(py::cast<const device &>(py_dev).data());
    
    
          PYOPENCL_PRINT_CALL_TRACE("clCreateContext");
          ctx = clCreateContext(
              props_ptr,
              devices.size(),
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
              devices.empty( ) ? nullptr : &devices.front(),
    
              0, 0, &status_code);
        }
        // from dev_type
        else
        {
          cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT;
          if (py_dev_type.ptr() != Py_None)
    
            dev_type = py::cast<cl_device_type>(py_dev_type);
    
    
          PYOPENCL_PRINT_CALL_TRACE("clCreateContextFromType");
          ctx = clCreateContextFromType(props_ptr, dev_type, 0, 0, &status_code);
        }
    
        if (status_code != CL_SUCCESS)
          throw pyopencl::error("Context", status_code);
    
        try
        {
    
          new (self) context(ctx, false);
    
        }
        catch (...)
        {
          PYOPENCL_CALL_GUARDED(clReleaseContext, (ctx));
          throw;
        }
      }
    
    
      inline
    
      void create_context(context *self, py::object py_devices, py::object py_properties,
    
          py::object py_dev_type)
      {
        PYOPENCL_RETRY_RETURN_IF_MEM_ERROR(
    
          create_context_inner(self, py_devices, py_properties, py_dev_type);
    
      // {{{ command_queue
    
      class command_queue
      {
        private:
          cl_command_queue m_queue;
    
          // m_finalized==True indicates that this command queue should no longer
          // be used. An example of this is if a command queue is used as a context
          // manager, after the 'with' block exits.
          //
          // This mechanism is not foolproof, as it is perfectly possible to create
          // other Python proxy objects referring to the same underlying
          // cl_command_queue. Even so, this ought to flag a class of potentially
          // very damaging synchronization bugs.
          bool m_finalized;
    
    
        public:
          command_queue(cl_command_queue q, bool retain)
    
            : m_queue(q), m_finalized(false)
    
          {
            if (retain)
              PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (q));
          }
    
          command_queue(command_queue const &src)
    
            : m_queue(src.m_queue), m_finalized(false)
    
          {
            PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue));
          }
    
          command_queue(
              const context &ctx,
    
              const device *py_dev=nullptr,
              py::object py_props=py::none())
    
          {
            cl_device_id dev;
            if (py_dev)
              dev = py_dev->data();
            else
            {
              std::vector<cl_device_id> devs;
              PYOPENCL_GET_VEC_INFO(Context, ctx.data(), CL_CONTEXT_DEVICES, devs);
              if (devs.size() == 0)
                throw pyopencl::error("CommandQueue", CL_INVALID_VALUE,
                    "context doesn't have any devices? -- don't know which one to default to");
              dev = devs[0];
            }
    
    
            int hex_plat_version = ctx.get_hex_platform_version();
    
            bool props_given_as_numeric;
            cl_command_queue_properties num_props;
            if (py_props.is_none())
            {
              num_props = 0;
              props_given_as_numeric = true;
            }
            else
            {
              try
              {
                num_props = py::cast<cl_command_queue_properties>(py_props);
                props_given_as_numeric = true;
              }
              catch (py::cast_error &)
              {
                props_given_as_numeric = false;
              }
            }
    
            if (props_given_as_numeric)
            {
    #if PYOPENCL_CL_VERSION >= 0x2000
              if (hex_plat_version  >= 0x2000)
              {
                cl_queue_properties props_list[] = { CL_QUEUE_PROPERTIES, num_props, 0 };
    
                cl_int status_code;
    
                PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueueWithProperties");
                m_queue = clCreateCommandQueueWithProperties(
                    ctx.data(), dev, props_list, &status_code);
    
                if (status_code != CL_SUCCESS)
                  throw pyopencl::error("CommandQueue", status_code);
              }
              else
    #endif
              {
                cl_int status_code;
    
                PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueue");
    
    #if defined(__GNUG__) && !defined(__clang__)
    #pragma GCC diagnostic push
    #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
    #endif
    
                m_queue = clCreateCommandQueue(
                    ctx.data(), dev, num_props, &status_code);
    
    #if defined(__GNUG__) && !defined(__clang__)
    #pragma GCC diagnostic pop
    #endif
    
                if (status_code != CL_SUCCESS)
                  throw pyopencl::error("CommandQueue", status_code);
              }
            }
            else
            {
    
    #if PYOPENCL_CL_VERSION < 0x2000
    
                throw error("CommandQueue", CL_INVALID_VALUE,
                    "queue properties given as an iterable, "
                    "which is only allowed when PyOpenCL was built "
                    "against an OpenCL 2+ header");
    
              if (hex_plat_version  < 0x2000)
              {
                std::cerr <<
                    "queue properties given as an iterable, "
                    "which uses an OpenCL 2+-only interface, "
                    "but the context's platform does not "
                    "declare OpenCL 2 support. Proceeding "
    
                    "as requested, but the next thing you see "
    
              PYOPENCL_STACK_CONTAINER(cl_queue_properties, props, py::len(py_props) + 1);
    
              {
                size_t i = 0;
                for (auto prop: py_props)
                  props[i++] = py::cast<cl_queue_properties>(prop);
                props[i++] = 0;
              }
    
              cl_int status_code;
              PYOPENCL_PRINT_CALL_TRACE("clCreateCommandQueueWithProperties");
              m_queue = clCreateCommandQueueWithProperties(
    
                  ctx.data(), dev, PYOPENCL_STACK_CONTAINER_GET_PTR(props), &status_code);
    
              if (status_code != CL_SUCCESS)
                throw pyopencl::error("CommandQueue", status_code);
    
          }
    
          ~command_queue()
          {
            PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseCommandQueue,
                (m_queue));
          }
    
          const cl_command_queue data() const
    
              auto mod_warnings(py::module_::import_("warnings"));
              auto mod_cl(py::module_::import_("pyopencl"));
    
              mod_warnings.attr("warn")(
                  "Command queue used after exit of context manager. "
                  "This is deprecated and will stop working in 2023.",
                  mod_cl.attr("CommandQueueUsedAfterExit")
                  );
            }
            return m_queue;
          }
    
          void finalize()
          {
            m_finalized = true;
          }
    
    
          PYOPENCL_EQUALITY_TESTS(command_queue);
    
          py::object get_info(cl_command_queue_info param_name) const
          {
            switch (param_name)
            {
              case CL_QUEUE_CONTEXT:
                PYOPENCL_GET_OPAQUE_INFO(CommandQueue, m_queue, param_name,
                    cl_context, context);
              case CL_QUEUE_DEVICE:
                PYOPENCL_GET_OPAQUE_INFO(CommandQueue, m_queue, param_name,
                    cl_device_id, device);
              case CL_QUEUE_REFERENCE_COUNT:
    
                PYOPENCL_GET_TYPED_INFO(CommandQueue, m_queue, param_name,
    
                    cl_uint);
              case CL_QUEUE_PROPERTIES:
    
                PYOPENCL_GET_TYPED_INFO(CommandQueue, m_queue, param_name,
    
                    cl_command_queue_properties);
    
    #if PYOPENCL_CL_VERSION >= 0x2000
              case CL_QUEUE_SIZE:
                PYOPENCL_GET_TYPED_INFO(CommandQueue, m_queue, param_name,
                    cl_uint);
    #endif
    #if PYOPENCL_CL_VERSION >= 0x2010
              case CL_QUEUE_DEVICE_DEFAULT:
                PYOPENCL_GET_OPAQUE_INFO(
                    CommandQueue, m_queue, param_name, cl_command_queue, command_queue);
    #endif
    #if PYOPENCL_CL_VERSION >= 0x3000
              case CL_QUEUE_PROPERTIES_ARRAY:
                {
                  std::vector<cl_queue_properties> result;
    
                  PYOPENCL_GET_VEC_INFO(CommandQueue, data(), param_name, result);
    
                  PYOPENCL_RETURN_VECTOR(cl_queue_properties, result);
                }
    #endif
    
    
              default:
                throw error("CommandQueue.get_info", CL_INVALID_VALUE);
            }
          }
    
    
          std::unique_ptr<context> get_context() const
    
          {
            cl_context param_value;
            PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo,
    
                (data(), CL_QUEUE_CONTEXT, sizeof(param_value), &param_value, 0));
    
            return std::unique_ptr<context>(
    
                new context(param_value, /*retain*/ true));
          }
    
    #if PYOPENCL_CL_VERSION < 0x1010
          cl_command_queue_properties set_property(
              cl_command_queue_properties prop,
              bool enable)
          {
            cl_command_queue_properties old_prop;
            PYOPENCL_CALL_GUARDED(clSetCommandQueueProperty,
    
                (data(), prop, PYOPENCL_CAST_BOOL(enable), &old_prop));
    
            return old_prop;
          }
    #endif
    
          void flush()
    
          { PYOPENCL_CALL_GUARDED(clFlush, (data())); }
    
          void finish()
    
            if (m_finalized) {
              return;
            } else {
              cl_command_queue queue = data();
    
              PYOPENCL_CALL_GUARDED_THREADED(clFinish, (queue));
            }
    
    
          // not exposed to python
          int get_hex_device_version() const
          {
            cl_device_id dev;
    
            PYOPENCL_CALL_GUARDED(clGetCommandQueueInfo,
    
                (data(), CL_QUEUE_DEVICE, sizeof(dev), &dev, nullptr));
    
    
            std::string dev_version;
            {
              size_t param_value_size;
              PYOPENCL_CALL_GUARDED(clGetDeviceInfo,
                  (dev, CL_DEVICE_VERSION, 0, 0, &param_value_size));
    
              std::vector<char> param_value(param_value_size);
              PYOPENCL_CALL_GUARDED(clGetDeviceInfo,
                  (dev, CL_DEVICE_VERSION, param_value_size,
                   param_value.empty( ) ? nullptr : &param_value.front(), &param_value_size));
    
              dev_version =
                  param_value.empty( ) ? "" : std::string(&param_value.front(), param_value_size-1);
            }
    
            int major_ver, minor_ver;
            errno = 0;
            int match_count = sscanf(dev_version.c_str(), "OpenCL %d.%d ", &major_ver, &minor_ver);
            if (errno || match_count != 2)
              throw error("CommandQueue._get_hex_device_version", CL_INVALID_VALUE,
                  "Platform version string did not have expected format");
    
            return major_ver << 12 | minor_ver << 4;
          }
    
      // {{{ command_queue_ref
    
      // In contrast to command_queue, command_queue_ref is "nullable", i.e.
      // it is a RAII *optional* reference to a command queue.
    
      class command_queue_ref
      {
        private:
          bool m_valid;
          cl_command_queue m_queue;
    
        public:
          command_queue_ref()
            : m_valid(false)
          {}
    
          command_queue_ref(cl_command_queue queue)
            : m_valid(queue != nullptr), m_queue(queue)
          {
            // E.g. SVM allocations of size zero use a NULL queue. Tolerate that.
            if (m_valid)
              PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue));
          }
    
          command_queue_ref(command_queue_ref &&src) noexcept
            : m_valid(src.m_valid), m_queue(src.m_queue)
          {
            src.m_valid = false;
          }
    
          command_queue_ref(const command_queue_ref &src)
          : m_valid(src.m_valid), m_queue(src.m_queue)
          {
            // Note that there isn't anything per se wrong with this
            // copy constructor, the refcounting is just potentially
            // expensive.
            //
            // All code in current use moves these, it does not copy them,
            // so this should never get called.
            //
            // Unfortunately, we can't delete this copy constructor,
            // because we would like to return these from functions.
            // This makes at least gcc require copy constructors, even
            // if those are never called due to NRVO.
            std::cerr << "COPYING A COMMAND_QUEUE_REF." << std::endl;
    
            if (m_valid)
              PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue));
          }
    
          command_queue_ref &operator=(const command_queue_ref &) = delete;
    
          ~command_queue_ref()
          {
            reset();
          }
    
          bool is_valid() const
          {
            return m_valid;
          }
    
          cl_command_queue data() const
          {
            if (m_valid)
              return m_queue;
            else
              throw error("command_queue_ref.data", CL_INVALID_VALUE,
                  "command_queue_ref is not valid");
          }
    
          void reset()
          {
            if (m_valid)
              PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseCommandQueue, (m_queue));
            m_valid = false;
          }
    
          void set(cl_command_queue queue)
          {
            if (!queue)
              throw error("command_queue_ref.set", CL_INVALID_VALUE,
                  "cannot set to NULL command queue");
    
            if (m_valid)
              PYOPENCL_CALL_GUARDED(clReleaseCommandQueue, (m_queue));
            m_queue = queue;
            PYOPENCL_CALL_GUARDED(clRetainCommandQueue, (m_queue));
            m_valid = true;
          }
      };
    
      // }}}
    
    
    
      // {{{ event/synchronization
    
    
      class event : noncopyable
    
      {
        private:
          cl_event m_event;
    
        public:
          event(cl_event event, bool retain)
            : m_event(event)
          {
            if (retain)
              PYOPENCL_CALL_GUARDED(clRetainEvent, (event));
          }
    
          event(event const &src)
            : m_event(src.m_event)
          { PYOPENCL_CALL_GUARDED(clRetainEvent, (m_event)); }
    
          virtual ~event()
          {
            PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseEvent,
                (m_event));
          }
    
          const cl_event data() const
          { return m_event; }
    
          PYOPENCL_EQUALITY_TESTS(event);
    
          py::object get_info(cl_event_info param_name) const
          {
            switch (param_name)
            {
              case CL_EVENT_COMMAND_QUEUE:
                PYOPENCL_GET_OPAQUE_INFO(Event, m_event, param_name,
                    cl_command_queue, command_queue);
              case CL_EVENT_COMMAND_TYPE:
    
                PYOPENCL_GET_TYPED_INFO(Event, m_event, param_name,
    
                    cl_command_type);
              case CL_EVENT_COMMAND_EXECUTION_STATUS:
    
                PYOPENCL_GET_TYPED_INFO(Event, m_event, param_name,
    
                    cl_int);
              case CL_EVENT_REFERENCE_COUNT:
    
                PYOPENCL_GET_TYPED_INFO(Event, m_event, param_name,
    
                    cl_uint);
    #if PYOPENCL_CL_VERSION >= 0x1010
              case CL_EVENT_CONTEXT:
                PYOPENCL_GET_OPAQUE_INFO(Event, m_event, param_name,
                    cl_context, context);
    #endif
    
              default:
                throw error("Event.get_info", CL_INVALID_VALUE);
            }
          }
    
          py::object get_profiling_info(cl_profiling_info param_name) const
          {
            switch (param_name)
            {
              case CL_PROFILING_COMMAND_QUEUED:
              case CL_PROFILING_COMMAND_SUBMIT:
              case CL_PROFILING_COMMAND_START:
              case CL_PROFILING_COMMAND_END:
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
    #if PYOPENCL_CL_VERSION >= 0x2000
              case CL_PROFILING_COMMAND_COMPLETE:
    #endif
    
                PYOPENCL_GET_TYPED_INFO(EventProfiling, m_event, param_name,
    
                    cl_ulong);
              default:
                throw error("Event.get_profiling_info", CL_INVALID_VALUE);
            }
          }
    
          virtual void wait()
          {
            PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, (1, &m_event));
          }
    
          // Called from a destructor context below:
          // - Should not release the GIL
          // - Should fail gracefully in the face of errors
          virtual void wait_during_cleanup_without_releasing_the_gil()
          {
            PYOPENCL_CALL_GUARDED_CLEANUP(clWaitForEvents, (1, &m_event));
          }
    
    
    #if PYOPENCL_CL_VERSION >= 0x1010
        // {{{ set_callback, by way of a a thread-based construction
    
        private:
          struct event_callback_info_t
          {
            std::mutex m_mutex;
            std::condition_variable m_condvar;
    
    
            // FIXME: Should implement GC traversal so that these can be collected.
    
            py::object m_py_event;
            py::object m_py_callback;
    
            bool m_set_callback_suceeded;
    
    
            cl_event m_event;
            cl_int m_command_exec_status;
    
            event_callback_info_t(py::object py_event, py::object py_callback)
    
            : m_py_event(py_event), m_py_callback(py_callback), m_set_callback_suceeded(true),
            m_notify_thread_wakeup_is_genuine(false)
    
          static void CL_CALLBACK evt_callback(cl_event evt, cl_int command_exec_status, void *user_data)
    
          {
            event_callback_info_t *cb_info = reinterpret_cast<event_callback_info_t *>(user_data);
            {
              std::lock_guard<std::mutex> lg(cb_info->m_mutex);
              cb_info->m_event = evt;
              cb_info->m_command_exec_status = command_exec_status;
    
            cb_info->m_condvar.notify_one();
          }
    
        public:
          void set_callback(cl_int command_exec_callback_type, py::object pfn_event_notify)
          {
            // The reason for doing this via a thread is that we're able to wait on
            // acquiring the GIL. (which we can't in the callback)
    
            std::unique_ptr<event_callback_info_t> cb_info_holder(
                new event_callback_info_t(
                  handle_from_new_ptr(new event(*this)),
                  pfn_event_notify));
            event_callback_info_t *cb_info = cb_info_holder.get();
    
            std::thread notif_thread([cb_info]()
                {
    
                  {
                    std::unique_lock<std::mutex> ulk(cb_info->m_mutex);
    
                    cb_info->m_condvar.wait(
                        ulk,
                        [&](){ return cb_info->m_notify_thread_wakeup_is_genuine; });
    
    
                    // ulk no longer held here, cb_info ready for deletion
                  }
    
    
                  {
                    py::gil_scoped_acquire acquire;
    
                    if (cb_info->m_set_callback_suceeded)
                    {
                      try {
                        cb_info->m_py_callback(
                            // cb_info->m_py_event,
                            cb_info->m_command_exec_status);
                      }
                      catch (std::exception &exc)
                      {
                        std::cerr
                        << "[pyopencl] event callback handler threw an exception, ignoring: "
                        << exc.what()
                        << std::endl;
                      }
                    }
    
                    // Need to hold GIL to delete py::object instances in
                    // event_callback_info_t
                    delete cb_info;
                  }
                });
            // Thread is away--it is now its responsibility to free cb_info.
            cb_info_holder.release();
    
            // notif_thread should no longer be coupled to the lifetime of the thread.
            notif_thread.detach();
    
            try
            {
              PYOPENCL_CALL_GUARDED(clSetEventCallback, (
                    data(), command_exec_callback_type, &event::evt_callback, cb_info));
            }
            catch (...) {
              // Setting the callback did not succeed. The thread would never
              // be woken up. Wake it up to let it know that it can stop.
              {
                std::lock_guard<std::mutex> lg(cb_info->m_mutex);
                cb_info->m_set_callback_suceeded = false;
    
              }
              cb_info->m_condvar.notify_one();
              throw;
            }
          }