Skip to content
Snippets Groups Projects
wrap_cl.hpp 124 KiB
Newer Older
  • Learn to ignore specific revisions
  •           set_arg_null(arg_index);
              return;
            }
    
    
              set_arg_mem(arg_index, arg.cast<memory_object_holder &>());
    
            catch (py::cast_error &) { }
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
    #if PYOPENCL_CL_VERSION >= 0x2000
            try
            {
              set_arg_svm(arg_index, arg.cast<svm_arg_wrapper const &>());
              return;
            }
            catch (py::cast_error &) { }
    #endif
    
    
              set_arg_local(arg_index, arg.cast<local_memory>());
    
            catch (py::cast_error &) { }
    
              set_arg_sampler(arg_index, arg.cast<const sampler &>());
    
            catch (py::cast_error &) { }
    
    
            set_arg_buf(arg_index, arg);
          }
    
          py::object get_info(cl_kernel_info param_name) const
          {
            switch (param_name)
            {
              case CL_KERNEL_FUNCTION_NAME:
                PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name);
              case CL_KERNEL_NUM_ARGS:
              case CL_KERNEL_REFERENCE_COUNT:
                PYOPENCL_GET_INTEGRAL_INFO(Kernel, m_kernel, param_name,
                    cl_uint);
              case CL_KERNEL_CONTEXT:
                PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name,
                    cl_context, context);
              case CL_KERNEL_PROGRAM:
                PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name,
                    cl_program, program);
    #if PYOPENCL_CL_VERSION >= 0x1020
              case CL_KERNEL_ATTRIBUTES:
                PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name);
    #endif
              default:
                throw error("Kernel.get_info", CL_INVALID_VALUE);
            }
          }
    
          py::object get_work_group_info(
              cl_kernel_work_group_info param_name,
              device const &dev
              ) const
          {
            switch (param_name)
            {
    #define PYOPENCL_FIRST_ARG m_kernel, dev.data() // hackety hack
              case CL_KERNEL_WORK_GROUP_SIZE:
                PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup,
                    PYOPENCL_FIRST_ARG, param_name,
                    size_t);
              case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
                {
                  std::vector<size_t> result;
                  PYOPENCL_GET_VEC_INFO(KernelWorkGroup,
                      PYOPENCL_FIRST_ARG, param_name, result);
    
                  PYOPENCL_RETURN_VECTOR(size_t, result);
                }
              case CL_KERNEL_LOCAL_MEM_SIZE:
    #if PYOPENCL_CL_VERSION >= 0x1010
              case CL_KERNEL_PRIVATE_MEM_SIZE:
    #endif
                PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup,
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_ulong);
    
    #if PYOPENCL_CL_VERSION >= 0x1010
              case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
                PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup,
                    PYOPENCL_FIRST_ARG, param_name,
                    size_t);
    #endif
              default:
                throw error("Kernel.get_work_group_info", CL_INVALID_VALUE);
    #undef PYOPENCL_FIRST_ARG
            }
          }
    
    #if PYOPENCL_CL_VERSION >= 0x1020
          py::object get_arg_info(
              cl_uint arg_index,
              cl_kernel_arg_info param_name
              ) const
          {
            switch (param_name)
            {
    #define PYOPENCL_FIRST_ARG m_kernel, arg_index // hackety hack
              case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
                PYOPENCL_GET_INTEGRAL_INFO(KernelArg,
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_kernel_arg_address_qualifier);
    
              case CL_KERNEL_ARG_ACCESS_QUALIFIER:
                PYOPENCL_GET_INTEGRAL_INFO(KernelArg,
                    PYOPENCL_FIRST_ARG, param_name,
                    cl_kernel_arg_access_qualifier);
    
              case CL_KERNEL_ARG_TYPE_NAME:
              case CL_KERNEL_ARG_NAME:
                PYOPENCL_GET_STR_INFO(KernelArg, PYOPENCL_FIRST_ARG, param_name);
    #undef PYOPENCL_FIRST_ARG
              default:
                throw error("Kernel.get_arg_info", CL_INVALID_VALUE);
            }
          }
    #endif
      };
    
    
      inline
      py::list create_kernels_in_program(program &pgm)
      {
        cl_uint num_kernels;
        PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, (
              pgm.data(), 0, 0, &num_kernels));
    
        std::vector<cl_kernel> kernels(num_kernels);
        PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, (
              pgm.data(), num_kernels,
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
              kernels.empty( ) ? nullptr : &kernels.front(), &num_kernels));
    
        for (cl_kernel knl: kernels)
    
          result.append(handle_from_new_ptr(new kernel(knl, true)));
    
        return result;
      }
    
    
    
      inline
      event *enqueue_nd_range_kernel(
          command_queue &cq,
          kernel &knl,
          py::object py_global_work_size,
          py::object py_local_work_size,
          py::object py_global_work_offset,
          py::object py_wait_for,
          bool g_times_l)
      {
        PYOPENCL_PARSE_WAIT_FOR;
    
        cl_uint work_dim = len(py_global_work_size);
    
        std::vector<size_t> global_work_size;
        COPY_PY_LIST(size_t, global_work_size);
    
        size_t *local_work_size_ptr = 0;
        std::vector<size_t> local_work_size;
        if (py_local_work_size.ptr() != Py_None)
        {
          if (g_times_l)
            work_dim = std::max(work_dim, unsigned(len(py_local_work_size)));
          else
            if (work_dim != unsigned(len(py_local_work_size)))
              throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
                  "global/local work sizes have differing dimensions");
    
          COPY_PY_LIST(size_t, local_work_size);
    
          while (local_work_size.size() < work_dim)
            local_work_size.push_back(1);
          while (global_work_size.size() < work_dim)
            global_work_size.push_back(1);
    
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
          local_work_size_ptr = local_work_size.empty( ) ? nullptr : &local_work_size.front();
    
        }
    
        if (g_times_l && local_work_size_ptr)
        {
          for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
            global_work_size[work_axis] *= local_work_size[work_axis];
        }
    
        size_t *global_work_offset_ptr = 0;
        std::vector<size_t> global_work_offset;
        if (py_global_work_offset.ptr() != Py_None)
        {
          if (work_dim != unsigned(len(py_global_work_offset)))
            throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
                "global work size and offset have differing dimensions");
    
          COPY_PY_LIST(size_t, global_work_offset);
    
          if (g_times_l && local_work_size_ptr)
          {
            for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
              global_work_offset[work_axis] *= local_work_size[work_axis];
          }
    
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
          global_work_offset_ptr = global_work_offset.empty( ) ? nullptr :  &global_work_offset.front();
    
        }
    
        PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( {
              cl_event evt;
              PYOPENCL_CALL_GUARDED(clEnqueueNDRangeKernel, (
                    cq.data(),
                    knl.data(),
                    work_dim,
                    global_work_offset_ptr,
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
                    global_work_size.empty( ) ? nullptr : &global_work_size.front(),
    
                    local_work_size_ptr,
                    PYOPENCL_WAITLIST_ARGS, &evt
                    ));
              PYOPENCL_RETURN_NEW_EVENT(evt);
            } );
      }
    
      // }}}
    
    
      // {{{ gl interop
      inline
      bool have_gl()
      {
    #ifdef HAVE_GL
        return true;
    #else
        return false;
    #endif
      }
    
    
    
    
    #ifdef HAVE_GL
    
    #ifdef __APPLE__
      inline
      cl_context_properties get_apple_cgl_share_group()
      {
        CGLContextObj kCGLContext = CGLGetCurrentContext();
        CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
    
        return (cl_context_properties) kCGLShareGroup;
      }
    #endif /* __APPLE__ */
    
    
    
    
      class gl_buffer : public memory_object
      {
        public:
          gl_buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
    
            : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf))
    
          { }
      };
    
    
    
    
      class gl_renderbuffer : public memory_object
      {
        public:
          gl_renderbuffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
    
            : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf))
    
          { }
      };
    
    
    
    
      class gl_texture : public image
      {
        public:
          gl_texture(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
    
            : image(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf))
    
          { }
    
          py::object get_gl_texture_info(cl_gl_texture_info param_name)
          {
            switch (param_name)
            {
              case CL_GL_TEXTURE_TARGET:
                PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLenum);
              case CL_GL_MIPMAP_LEVEL:
                PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLint);
    
              default:
                throw error("MemoryObject.get_gl_texture_info", CL_INVALID_VALUE);
            }
          }
      };
    
    
    
    
    #define PYOPENCL_WRAP_BUFFER_CREATOR(TYPE, NAME, CL_NAME, ARGS, CL_ARGS) \
      inline \
      TYPE *NAME ARGS \
      { \
        cl_int status_code; \
        PYOPENCL_PRINT_CALL_TRACE(#CL_NAME); \
        cl_mem mem = CL_NAME CL_ARGS; \
        \
        if (status_code != CL_SUCCESS) \
          throw pyopencl::error(#CL_NAME, status_code); \
        \
        try \
        { \
          return new TYPE(mem, false); \
        } \
        catch (...) \
        { \
          PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); \
          throw; \
        } \
      }
    
    
    
    
      PYOPENCL_WRAP_BUFFER_CREATOR(gl_buffer,
          create_from_gl_buffer, clCreateFromGLBuffer,
          (context &ctx, cl_mem_flags flags, GLuint bufobj),
          (ctx.data(), flags, bufobj, &status_code));
      PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture,
          create_from_gl_texture_2d, clCreateFromGLTexture2D,
          (context &ctx, cl_mem_flags flags,
             GLenum texture_target, GLint miplevel, GLuint texture),
          (ctx.data(), flags, texture_target, miplevel, texture, &status_code));
      PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture,
          create_from_gl_texture_3d, clCreateFromGLTexture3D,
          (context &ctx, cl_mem_flags flags,
             GLenum texture_target, GLint miplevel, GLuint texture),
          (ctx.data(), flags, texture_target, miplevel, texture, &status_code));
      PYOPENCL_WRAP_BUFFER_CREATOR(gl_renderbuffer,
          create_from_gl_renderbuffer, clCreateFromGLRenderbuffer,
          (context &ctx, cl_mem_flags flags, GLuint renderbuffer),
          (ctx.data(), flags, renderbuffer, &status_code));
    
      inline
      gl_texture *create_from_gl_texture(
          context &ctx, cl_mem_flags flags,
          GLenum texture_target, GLint miplevel,
          GLuint texture, unsigned dims)
      {
        if (dims == 2)
          return create_from_gl_texture_2d(ctx, flags, texture_target, miplevel, texture);
        else if (dims == 3)
          return create_from_gl_texture_3d(ctx, flags, texture_target, miplevel, texture);
        else
          throw pyopencl::error("Image", CL_INVALID_VALUE,
              "invalid dimension");
      }
    
    
    
    
    
      inline
      py::tuple get_gl_object_info(memory_object_holder const &mem)
      {
        cl_gl_object_type otype;
        GLuint gl_name;
        PYOPENCL_CALL_GUARDED(clGetGLObjectInfo, (mem.data(), &otype, &gl_name));
        return py::make_tuple(otype, gl_name);
      }
    
    #define WRAP_GL_ENQUEUE(what, What) \
      inline \
      event *enqueue_##what##_gl_objects( \
          command_queue &cq, \
          py::object py_mem_objects, \
          py::object py_wait_for) \
      { \
        PYOPENCL_PARSE_WAIT_FOR; \
        \
        std::vector<cl_mem> mem_objects; \
    
        for (py::handle mo: py_mem_objects) \
    
          mem_objects.push_back((mo).cast<memory_object_holder &>().data()); \
    
        \
        cl_event evt; \
        PYOPENCL_CALL_GUARDED(clEnqueue##What##GLObjects, ( \
              cq.data(), \
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
              mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(), \
    
              PYOPENCL_WAITLIST_ARGS, &evt \
              )); \
        \
        PYOPENCL_RETURN_NEW_EVENT(evt); \
      }
    
      WRAP_GL_ENQUEUE(acquire, Acquire);
      WRAP_GL_ENQUEUE(release, Release);
    #endif
    
    
    
    
    #if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1)
      inline
      py::object get_gl_context_info_khr(
          py::object py_properties,
          cl_gl_context_info param_name,
          py::object py_platform
          )
      {
        std::vector<cl_context_properties> props
          = parse_context_properties(py_properties);
    
        typedef CL_API_ENTRY cl_int (CL_API_CALL
          *func_ptr_type)(const cl_context_properties * /* properties */,
              cl_gl_context_info            /* param_name */,
              size_t                        /* param_value_size */,
              void *                        /* param_value */,
              size_t *                      /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
    
        func_ptr_type func_ptr;
    
    #if PYOPENCL_CL_VERSION >= 0x1020
        if (py_platform.ptr() != Py_None)
        {
    
          platform &plat = (py_platform).cast<platform &>();
    
    
          func_ptr = (func_ptr_type) clGetExtensionFunctionAddressForPlatform(
                plat.data(), "clGetGLContextInfoKHR");
        }
        else
        {
          PYOPENCL_DEPRECATED("get_gl_context_info_khr with platform=None", "2013.1", );
    
          func_ptr = (func_ptr_type) clGetExtensionFunctionAddress(
                "clGetGLContextInfoKHR");
        }
    #else
        func_ptr = (func_ptr_type) clGetExtensionFunctionAddress(
              "clGetGLContextInfoKHR");
    #endif
    
    
        if (!func_ptr)
          throw error("Context.get_info", CL_INVALID_PLATFORM,
              "clGetGLContextInfoKHR extension function not present");
    
        cl_context_properties *props_ptr
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
          = props.empty( ) ? nullptr : &props.front();
    
    
        switch (param_name)
        {
          case CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR:
            {
              cl_device_id param_value;
              PYOPENCL_CALL_GUARDED(func_ptr,
                  (props_ptr, param_name, sizeof(param_value), &param_value, 0));
              return py::object(handle_from_new_ptr( \
                    new device(param_value, /*retain*/ true)));
            }
    
          case CL_DEVICES_FOR_GL_CONTEXT_KHR:
            {
              size_t size;
              PYOPENCL_CALL_GUARDED(func_ptr,
                  (props_ptr, param_name, 0, 0, &size));
    
              std::vector<cl_device_id> devices;
    
              devices.resize(size / sizeof(devices.front()));
    
              PYOPENCL_CALL_GUARDED(func_ptr,
                  (props_ptr, param_name, size,
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
                   devices.empty( ) ? nullptr : &devices.front(), &size));
    
              for (cl_device_id did: devices)
    
                result.append(handle_from_new_ptr(
                      new device(did)));
    
              return result;
            }
    
          default:
            throw error("get_gl_context_info_khr", CL_INVALID_VALUE);
        }
      }
    
    #endif
    
      // }}}
    
    
      // {{{ deferred implementation bits
    
      inline py::object create_mem_object_wrapper(cl_mem mem)
      {
        cl_mem_object_type mem_obj_type;
        PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \
            (mem, CL_MEM_TYPE, sizeof(mem_obj_type), &mem_obj_type, 0));
    
        switch (mem_obj_type)
        {
          case CL_MEM_OBJECT_BUFFER:
            return py::object(handle_from_new_ptr(
                  new buffer(mem, /*retain*/ true)));
          case CL_MEM_OBJECT_IMAGE2D:
          case CL_MEM_OBJECT_IMAGE3D:
    #if PYOPENCL_CL_VERSION >= 0x1020
          case CL_MEM_OBJECT_IMAGE2D_ARRAY:
          case CL_MEM_OBJECT_IMAGE1D:
          case CL_MEM_OBJECT_IMAGE1D_ARRAY:
          case CL_MEM_OBJECT_IMAGE1D_BUFFER:
    #endif
            return py::object(handle_from_new_ptr(
                  new image(mem, /*retain*/ true)));
          default:
            return py::object(handle_from_new_ptr(
                  new memory_object(mem, /*retain*/ true)));
        }
      }
    
      inline
      py::object memory_object_from_int(intptr_t cl_mem_as_int)
      {
        return create_mem_object_wrapper((cl_mem) cl_mem_as_int);
      }
    
    
      inline
      py::object memory_object_holder::get_info(cl_mem_info param_name) const
      {
        switch (param_name)
        {
          case CL_MEM_TYPE:
            PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
                cl_mem_object_type);
          case CL_MEM_FLAGS:
            PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
                cl_mem_flags);
          case CL_MEM_SIZE:
            PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
                size_t);
          case CL_MEM_HOST_PTR:
            throw pyopencl::error("MemoryObject.get_info", CL_INVALID_VALUE,
                "Use MemoryObject.get_host_array to get host pointer.");
          case CL_MEM_MAP_COUNT:
            PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
                cl_uint);
          case CL_MEM_REFERENCE_COUNT:
            PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
                cl_uint);
          case CL_MEM_CONTEXT:
            PYOPENCL_GET_OPAQUE_INFO(MemObject, data(), param_name,
                cl_context, context);
    
    #if PYOPENCL_CL_VERSION >= 0x1010
          case CL_MEM_ASSOCIATED_MEMOBJECT:
            {
              cl_mem param_value;
              PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \
                  (data(), param_name, sizeof(param_value), &param_value, 0));
              if (param_value == 0)
              {
                // no associated memory object? no problem.
    
                return py::none();
    
              }
    
              return create_mem_object_wrapper(param_value);
            }
          case CL_MEM_OFFSET:
            PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
                size_t);
    #endif
    
          default:
            throw error("MemoryObjectHolder.get_info", CL_INVALID_VALUE);
        }
      }
    
      inline
    
      py::object get_mem_obj_host_array(
    
          py::object mem_obj_py,
          py::object shape, py::object dtype,
          py::object order_py)
      {
        memory_object_holder const &mem_obj =
    
          (mem_obj_py).cast<memory_object_holder const &>();
    
        PyArray_Descr *tp_descr;
        if (PyArray_DescrConverter(dtype.ptr(), &tp_descr) != NPY_SUCCEED)
          throw py::error_already_set();
        cl_mem_flags mem_flags;
        PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
                (mem_obj.data(), CL_MEM_FLAGS, sizeof(mem_flags), &mem_flags, 0));
        if (!(mem_flags & CL_MEM_USE_HOST_PTR))
          throw pyopencl::error("MemoryObject.get_host_array", CL_INVALID_VALUE,
                                "Only MemoryObject with USE_HOST_PTR "
                                "is supported.");
    
        std::vector<npy_intp> dims;
    
        try
        {
          dims.push_back(py::cast<npy_intp>(shape));
        }
        catch (py::cast_error &)
        {
          for (auto it: shape)
            dims.push_back(it.cast<npy_intp>());
        }
    
    
        NPY_ORDER order = PyArray_CORDER;
        PyArray_OrderConverter(order_py.ptr(), &order);
    
        int ary_flags = 0;
        if (order == PyArray_FORTRANORDER)
          ary_flags |= NPY_FARRAY;
        else if (order == PyArray_CORDER)
          ary_flags |= NPY_CARRAY;
        else
          throw std::runtime_error("unrecognized order specifier");
    
        void *host_ptr;
        size_t mem_obj_size;
        PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
            (mem_obj.data(), CL_MEM_HOST_PTR, sizeof(host_ptr),
             &host_ptr, 0));
        PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
            (mem_obj.data(), CL_MEM_SIZE, sizeof(mem_obj_size),
             &mem_obj_size, 0));
    
    
        py::object result = py::reinterpret_steal<py::object>(PyArray_NewFromDescr(
    
            &PyArray_Type, tp_descr,
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
            dims.size(), &dims.front(), /*strides*/ nullptr,
            host_ptr, ary_flags, /*obj*/nullptr));
    
        if ((size_t) PyArray_NBYTES(result.ptr()) > mem_obj_size)
    
          throw pyopencl::error("MemoryObject.get_host_array",
              CL_INVALID_VALUE,
              "Resulting array is larger than memory object.");
    
    
        PyArray_BASE(result.ptr()) = mem_obj_py.ptr();
    
        Py_INCREF(mem_obj_py.ptr());
    
        return result;
      }
    
      // }}}
    }
    
    #endif
    
    // vim: foldmethod=marker