Skip to content
Snippets Groups Projects
wrap_cl.hpp 122 KiB
Newer Older
              PYOPENCL_GET_VEC_INFO(KernelWorkGroup,
                  PYOPENCL_FIRST_ARG, param_name, result);

              PYOPENCL_RETURN_VECTOR(size_t, result);
            }
          case CL_KERNEL_LOCAL_MEM_SIZE:
#if PYOPENCL_CL_VERSION >= 0x1010
          case CL_KERNEL_PRIVATE_MEM_SIZE:
#endif
            PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup,
                PYOPENCL_FIRST_ARG, param_name,
                cl_ulong);

#if PYOPENCL_CL_VERSION >= 0x1010
          case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
            PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup,
                PYOPENCL_FIRST_ARG, param_name,
                size_t);
#endif
          default:
            throw error("Kernel.get_work_group_info", CL_INVALID_VALUE);
#undef PYOPENCL_FIRST_ARG
        }
      }

#if PYOPENCL_CL_VERSION >= 0x1020
      py::object get_arg_info(
          cl_uint arg_index,
          cl_kernel_arg_info param_name
          ) const
      {
        switch (param_name)
        {
#define PYOPENCL_FIRST_ARG m_kernel, arg_index // hackety hack
          case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
            PYOPENCL_GET_INTEGRAL_INFO(KernelArg,
                PYOPENCL_FIRST_ARG, param_name,
                cl_kernel_arg_address_qualifier);

          case CL_KERNEL_ARG_ACCESS_QUALIFIER:
            PYOPENCL_GET_INTEGRAL_INFO(KernelArg,
                PYOPENCL_FIRST_ARG, param_name,
                cl_kernel_arg_access_qualifier);

          case CL_KERNEL_ARG_TYPE_NAME:
          case CL_KERNEL_ARG_NAME:
            PYOPENCL_GET_STR_INFO(KernelArg, PYOPENCL_FIRST_ARG, param_name);
#undef PYOPENCL_FIRST_ARG
          default:
            throw error("Kernel.get_arg_info", CL_INVALID_VALUE);
        }
      }
#endif
  };


  inline
  py::list create_kernels_in_program(program &pgm)
  {
    cl_uint num_kernels;
    PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, (
          pgm.data(), 0, 0, &num_kernels));

    std::vector<cl_kernel> kernels(num_kernels);
    PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, (
          pgm.data(), num_kernels,
Andreas Klöckner's avatar
Andreas Klöckner committed
          kernels.empty( ) ? nullptr : &kernels.front(), &num_kernels));
    for (cl_kernel knl: kernels)
      result.append(handle_from_new_ptr(new kernel(knl, true)));

    return result;
  }



  inline
  event *enqueue_nd_range_kernel(
      command_queue &cq,
      kernel &knl,
      py::object py_global_work_size,
      py::object py_local_work_size,
      py::object py_global_work_offset,
      py::object py_wait_for,
      bool g_times_l)
  {
    PYOPENCL_PARSE_WAIT_FOR;

    cl_uint work_dim = len(py_global_work_size);

    std::vector<size_t> global_work_size;
    COPY_PY_LIST(size_t, global_work_size);

    size_t *local_work_size_ptr = 0;
    std::vector<size_t> local_work_size;
    if (py_local_work_size.ptr() != Py_None)
    {
      if (g_times_l)
        work_dim = std::max(work_dim, unsigned(len(py_local_work_size)));
      else
        if (work_dim != unsigned(len(py_local_work_size)))
          throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
              "global/local work sizes have differing dimensions");

      COPY_PY_LIST(size_t, local_work_size);

      while (local_work_size.size() < work_dim)
        local_work_size.push_back(1);
      while (global_work_size.size() < work_dim)
        global_work_size.push_back(1);

Andreas Klöckner's avatar
Andreas Klöckner committed
      local_work_size_ptr = local_work_size.empty( ) ? nullptr : &local_work_size.front();
    }

    if (g_times_l && local_work_size_ptr)
    {
      for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
        global_work_size[work_axis] *= local_work_size[work_axis];
    }

    size_t *global_work_offset_ptr = 0;
    std::vector<size_t> global_work_offset;
    if (py_global_work_offset.ptr() != Py_None)
    {
      if (work_dim != unsigned(len(py_global_work_offset)))
        throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
            "global work size and offset have differing dimensions");

      COPY_PY_LIST(size_t, global_work_offset);

      if (g_times_l && local_work_size_ptr)
      {
        for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
          global_work_offset[work_axis] *= local_work_size[work_axis];
      }

Andreas Klöckner's avatar
Andreas Klöckner committed
      global_work_offset_ptr = global_work_offset.empty( ) ? nullptr :  &global_work_offset.front();
    }

    PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( {
          cl_event evt;
          PYOPENCL_CALL_GUARDED(clEnqueueNDRangeKernel, (
                cq.data(),
                knl.data(),
                work_dim,
                global_work_offset_ptr,
Andreas Klöckner's avatar
Andreas Klöckner committed
                global_work_size.empty( ) ? nullptr : &global_work_size.front(),
                local_work_size_ptr,
                PYOPENCL_WAITLIST_ARGS, &evt
                ));
          PYOPENCL_RETURN_NEW_EVENT(evt);
        } );
  }

  // }}}

  // {{{ gl interop
  inline
  bool have_gl()
  {
#ifdef HAVE_GL
    return true;
#else
    return false;
#endif
  }




#ifdef HAVE_GL

#ifdef __APPLE__
  inline
  cl_context_properties get_apple_cgl_share_group()
  {
    CGLContextObj kCGLContext = CGLGetCurrentContext();
    CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);

    return (cl_context_properties) kCGLShareGroup;
  }
#endif /* __APPLE__ */




  class gl_buffer : public memory_object
  {
    public:
      gl_buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
        : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf))
      { }
  };




  class gl_renderbuffer : public memory_object
  {
    public:
      gl_renderbuffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
        : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf))
      { }
  };




  class gl_texture : public image
  {
    public:
      gl_texture(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
        : image(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf))
      { }

      py::object get_gl_texture_info(cl_gl_texture_info param_name)
      {
        switch (param_name)
        {
          case CL_GL_TEXTURE_TARGET:
            PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLenum);
          case CL_GL_MIPMAP_LEVEL:
            PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLint);

          default:
            throw error("MemoryObject.get_gl_texture_info", CL_INVALID_VALUE);
        }
      }
  };




#define PYOPENCL_WRAP_BUFFER_CREATOR(TYPE, NAME, CL_NAME, ARGS, CL_ARGS) \
  inline \
  TYPE *NAME ARGS \
  { \
    cl_int status_code; \
    PYOPENCL_PRINT_CALL_TRACE(#CL_NAME); \
    cl_mem mem = CL_NAME CL_ARGS; \
    \
    if (status_code != CL_SUCCESS) \
      throw pyopencl::error(#CL_NAME, status_code); \
    \
    try \
    { \
      return new TYPE(mem, false); \
    } \
    catch (...) \
    { \
      PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); \
      throw; \
    } \
  }




  PYOPENCL_WRAP_BUFFER_CREATOR(gl_buffer,
      create_from_gl_buffer, clCreateFromGLBuffer,
      (context &ctx, cl_mem_flags flags, GLuint bufobj),
      (ctx.data(), flags, bufobj, &status_code));
  PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture,
      create_from_gl_texture_2d, clCreateFromGLTexture2D,
      (context &ctx, cl_mem_flags flags,
         GLenum texture_target, GLint miplevel, GLuint texture),
      (ctx.data(), flags, texture_target, miplevel, texture, &status_code));
  PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture,
      create_from_gl_texture_3d, clCreateFromGLTexture3D,
      (context &ctx, cl_mem_flags flags,
         GLenum texture_target, GLint miplevel, GLuint texture),
      (ctx.data(), flags, texture_target, miplevel, texture, &status_code));
  PYOPENCL_WRAP_BUFFER_CREATOR(gl_renderbuffer,
      create_from_gl_renderbuffer, clCreateFromGLRenderbuffer,
      (context &ctx, cl_mem_flags flags, GLuint renderbuffer),
      (ctx.data(), flags, renderbuffer, &status_code));

  inline
  gl_texture *create_from_gl_texture(
      context &ctx, cl_mem_flags flags,
      GLenum texture_target, GLint miplevel,
      GLuint texture, unsigned dims)
  {
    if (dims == 2)
      return create_from_gl_texture_2d(ctx, flags, texture_target, miplevel, texture);
    else if (dims == 3)
      return create_from_gl_texture_3d(ctx, flags, texture_target, miplevel, texture);
    else
      throw pyopencl::error("Image", CL_INVALID_VALUE,
          "invalid dimension");
  }





  inline
  py::tuple get_gl_object_info(memory_object_holder const &mem)
  {
    cl_gl_object_type otype;
    GLuint gl_name;
    PYOPENCL_CALL_GUARDED(clGetGLObjectInfo, (mem.data(), &otype, &gl_name));
    return py::make_tuple(otype, gl_name);
  }

#define WRAP_GL_ENQUEUE(what, What) \
  inline \
  event *enqueue_##what##_gl_objects( \
      command_queue &cq, \
      py::object py_mem_objects, \
      py::object py_wait_for) \
  { \
    PYOPENCL_PARSE_WAIT_FOR; \
    \
    std::vector<cl_mem> mem_objects; \
    for (py::handle mo: py_mem_objects) \
      mem_objects.push_back((mo).cast<memory_object_holder &>().data()); \
    \
    cl_event evt; \
    PYOPENCL_CALL_GUARDED(clEnqueue##What##GLObjects, ( \
          cq.data(), \
Andreas Klöckner's avatar
Andreas Klöckner committed
          mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(), \
          PYOPENCL_WAITLIST_ARGS, &evt \
          )); \
    \
    PYOPENCL_RETURN_NEW_EVENT(evt); \
  }

  WRAP_GL_ENQUEUE(acquire, Acquire);
  WRAP_GL_ENQUEUE(release, Release);
#endif




#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1)
  inline
  py::object get_gl_context_info_khr(
      py::object py_properties,
      cl_gl_context_info param_name,
      py::object py_platform
      )
  {
    std::vector<cl_context_properties> props
      = parse_context_properties(py_properties);

    typedef CL_API_ENTRY cl_int (CL_API_CALL
      *func_ptr_type)(const cl_context_properties * /* properties */,
          cl_gl_context_info            /* param_name */,
          size_t                        /* param_value_size */,
          void *                        /* param_value */,
          size_t *                      /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;

    func_ptr_type func_ptr;

#if PYOPENCL_CL_VERSION >= 0x1020
    if (py_platform.ptr() != Py_None)
    {
      platform &plat = (py_platform).cast<platform &>();

      func_ptr = (func_ptr_type) clGetExtensionFunctionAddressForPlatform(
            plat.data(), "clGetGLContextInfoKHR");
    }
    else
    {
      PYOPENCL_DEPRECATED("get_gl_context_info_khr with platform=None", "2013.1", );

      func_ptr = (func_ptr_type) clGetExtensionFunctionAddress(
            "clGetGLContextInfoKHR");
    }
#else
    func_ptr = (func_ptr_type) clGetExtensionFunctionAddress(
          "clGetGLContextInfoKHR");
#endif


    if (!func_ptr)
      throw error("Context.get_info", CL_INVALID_PLATFORM,
          "clGetGLContextInfoKHR extension function not present");

    cl_context_properties *props_ptr
Andreas Klöckner's avatar
Andreas Klöckner committed
      = props.empty( ) ? nullptr : &props.front();

    switch (param_name)
    {
      case CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR:
        {
          cl_device_id param_value;
          PYOPENCL_CALL_GUARDED(func_ptr,
              (props_ptr, param_name, sizeof(param_value), &param_value, 0));
          return py::object(handle_from_new_ptr( \
                new device(param_value, /*retain*/ true)));
        }

      case CL_DEVICES_FOR_GL_CONTEXT_KHR:
        {
          size_t size;
          PYOPENCL_CALL_GUARDED(func_ptr,
              (props_ptr, param_name, 0, 0, &size));

          std::vector<cl_device_id> devices;

          devices.resize(size / sizeof(devices.front()));

          PYOPENCL_CALL_GUARDED(func_ptr,
              (props_ptr, param_name, size,
Andreas Klöckner's avatar
Andreas Klöckner committed
               devices.empty( ) ? nullptr : &devices.front(), &size));
          for (cl_device_id did: devices)
            result.append(handle_from_new_ptr(
                  new device(did)));

          return result;
        }

      default:
        throw error("get_gl_context_info_khr", CL_INVALID_VALUE);
    }
  }

#endif

  // }}}

  // {{{ deferred implementation bits

  inline py::object create_mem_object_wrapper(cl_mem mem)
  {
    cl_mem_object_type mem_obj_type;
    PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \
        (mem, CL_MEM_TYPE, sizeof(mem_obj_type), &mem_obj_type, 0));

    switch (mem_obj_type)
    {
      case CL_MEM_OBJECT_BUFFER:
        return py::object(handle_from_new_ptr(
              new buffer(mem, /*retain*/ true)));
      case CL_MEM_OBJECT_IMAGE2D:
      case CL_MEM_OBJECT_IMAGE3D:
#if PYOPENCL_CL_VERSION >= 0x1020
      case CL_MEM_OBJECT_IMAGE2D_ARRAY:
      case CL_MEM_OBJECT_IMAGE1D:
      case CL_MEM_OBJECT_IMAGE1D_ARRAY:
      case CL_MEM_OBJECT_IMAGE1D_BUFFER:
#endif
        return py::object(handle_from_new_ptr(
              new image(mem, /*retain*/ true)));
      default:
        return py::object(handle_from_new_ptr(
              new memory_object(mem, /*retain*/ true)));
    }
  }

  inline
  py::object memory_object_from_int(intptr_t cl_mem_as_int)
  {
    return create_mem_object_wrapper((cl_mem) cl_mem_as_int);
  }


  inline
  py::object memory_object_holder::get_info(cl_mem_info param_name) const
  {
    switch (param_name)
    {
      case CL_MEM_TYPE:
        PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
            cl_mem_object_type);
      case CL_MEM_FLAGS:
        PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
            cl_mem_flags);
      case CL_MEM_SIZE:
        PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
            size_t);
      case CL_MEM_HOST_PTR:
        throw pyopencl::error("MemoryObject.get_info", CL_INVALID_VALUE,
            "Use MemoryObject.get_host_array to get host pointer.");
      case CL_MEM_MAP_COUNT:
        PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
            cl_uint);
      case CL_MEM_REFERENCE_COUNT:
        PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
            cl_uint);
      case CL_MEM_CONTEXT:
        PYOPENCL_GET_OPAQUE_INFO(MemObject, data(), param_name,
            cl_context, context);

#if PYOPENCL_CL_VERSION >= 0x1010
      case CL_MEM_ASSOCIATED_MEMOBJECT:
        {
          cl_mem param_value;
          PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \
              (data(), param_name, sizeof(param_value), &param_value, 0));
          if (param_value == 0)
          {
            // no associated memory object? no problem.
            return py::none();
          }

          return create_mem_object_wrapper(param_value);
        }
      case CL_MEM_OFFSET:
        PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
            size_t);
#endif

      default:
        throw error("MemoryObjectHolder.get_info", CL_INVALID_VALUE);
    }
  }

  inline
  py::object get_mem_obj_host_array(
      py::object mem_obj_py,
      py::object shape, py::object dtype,
      py::object order_py)
  {
    memory_object_holder const &mem_obj =
      (mem_obj_py).cast<memory_object_holder const &>();
    PyArray_Descr *tp_descr;
    if (PyArray_DescrConverter(dtype.ptr(), &tp_descr) != NPY_SUCCEED)
      throw py::error_already_set();
    cl_mem_flags mem_flags;
    PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
            (mem_obj.data(), CL_MEM_FLAGS, sizeof(mem_flags), &mem_flags, 0));
    if (!(mem_flags & CL_MEM_USE_HOST_PTR))
      throw pyopencl::error("MemoryObject.get_host_array", CL_INVALID_VALUE,
                            "Only MemoryObject with USE_HOST_PTR "
                            "is supported.");

    std::vector<npy_intp> dims;
    try
    {
      dims.push_back(py::cast<npy_intp>(shape));
    }
    catch (py::cast_error &)
    {
      for (auto it: shape)
        dims.push_back(it.cast<npy_intp>());
    }

    NPY_ORDER order = PyArray_CORDER;
    PyArray_OrderConverter(order_py.ptr(), &order);

    int ary_flags = 0;
    if (order == PyArray_FORTRANORDER)
      ary_flags |= NPY_FARRAY;
    else if (order == PyArray_CORDER)
      ary_flags |= NPY_CARRAY;
    else
      throw std::runtime_error("unrecognized order specifier");

    void *host_ptr;
    size_t mem_obj_size;
    PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
        (mem_obj.data(), CL_MEM_HOST_PTR, sizeof(host_ptr),
         &host_ptr, 0));
    PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
        (mem_obj.data(), CL_MEM_SIZE, sizeof(mem_obj_size),
         &mem_obj_size, 0));

    py::object result = py::reinterpret_steal<py::object>(PyArray_NewFromDescr(
        &PyArray_Type, tp_descr,
Andreas Klöckner's avatar
Andreas Klöckner committed
        dims.size(), &dims.front(), /*strides*/ nullptr,
        host_ptr, ary_flags, /*obj*/nullptr));
    if ((size_t) PyArray_NBYTES(result.ptr()) > mem_obj_size)
      throw pyopencl::error("MemoryObject.get_host_array",
          CL_INVALID_VALUE,
          "Resulting array is larger than memory object.");

    PyArray_BASE(result.ptr()) = mem_obj_py.ptr();
    Py_INCREF(mem_obj_py.ptr());

    return result;
  }

  // }}}
}

#endif

// vim: foldmethod=marker