Skip to content
Snippets Groups Projects
wrap_cl.hpp 115 KiB
Newer Older
      );
    PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward);
  }




  inline
  event *enqueue_copy_buffer(
      command_queue &cq,
      memory_object_holder &src,
      memory_object_holder &dst,
      ptrdiff_t byte_count,
      size_t src_offset,
      size_t dst_offset,
      py::object py_wait_for)
  {
    PYOPENCL_PARSE_WAIT_FOR;

    if (byte_count < 0)
    {
      size_t byte_count_src = 0;
      size_t byte_count_dst = 0;
      PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
          (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_src, 0));
      PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
          (src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_dst, 0));
      byte_count = std::min(byte_count_src, byte_count_dst);
    }

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED(clEnqueueCopyBuffer, (
            cq.data(),
            src.data(), dst.data(),
            src_offset, dst_offset,
            byte_count,
            PYOPENCL_WAITLIST_ARGS,
            &evt
            ))
      );

    PYOPENCL_RETURN_NEW_EVENT(evt);
  }

  // }}}

  // {{{ rectangular transfers
#if PYOPENCL_CL_VERSION >= 0x1010
  inline
  event *enqueue_read_buffer_rect(
      command_queue &cq,
      memory_object_holder &mem,
      py::object buffer,
      py::object py_buffer_origin,
      py::object py_host_origin,
      py::object py_region,
      py::sequence py_buffer_pitches,
      py::sequence py_host_pitches,
      py::object py_wait_for,
      bool is_blocking
      )
  {
    PYOPENCL_PARSE_WAIT_FOR;
    COPY_PY_COORD_TRIPLE(buffer_origin);
    COPY_PY_COORD_TRIPLE(host_origin);
    COPY_PY_REGION_TRIPLE(region);
    COPY_PY_PITCH_TUPLE(buffer_pitches);
    COPY_PY_PITCH_TUPLE(host_pitches);

    void *buf;

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);

    ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE);

    buf = ward->m_buf.buf;
#else
    py::object ward = buffer;

    PYOPENCL_BUFFER_SIZE_T len;
    if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len))
      throw py::error_already_set();
#endif

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED_THREADED(clEnqueueReadBufferRect, (
            cq.data(),
            mem.data(),
            PYOPENCL_CAST_BOOL(is_blocking),
            buffer_origin, host_origin, region,
            buffer_pitches[0], buffer_pitches[1],
            host_pitches[0], host_pitches[1],
            buf,
            PYOPENCL_WAITLIST_ARGS, &evt
            ))
      );
    PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward);
  }




  inline
  event *enqueue_write_buffer_rect(
      command_queue &cq,
      memory_object_holder &mem,
      py::object buffer,
      py::object py_buffer_origin,
      py::object py_host_origin,
      py::object py_region,
      py::sequence py_buffer_pitches,
      py::sequence py_host_pitches,
      py::object py_wait_for,
      bool is_blocking
      )
  {
    PYOPENCL_PARSE_WAIT_FOR;
    COPY_PY_COORD_TRIPLE(buffer_origin);
    COPY_PY_COORD_TRIPLE(host_origin);
    COPY_PY_REGION_TRIPLE(region);
    COPY_PY_PITCH_TUPLE(buffer_pitches);
    COPY_PY_PITCH_TUPLE(host_pitches);

    const void *buf;

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);

    ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS);

    buf = ward->m_buf.buf;
#else
    py::object ward = buffer;
    PYOPENCL_BUFFER_SIZE_T len;
    if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len))
      throw py::error_already_set();
#endif

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED_THREADED(clEnqueueWriteBufferRect, (
            cq.data(),
            mem.data(),
            PYOPENCL_CAST_BOOL(is_blocking),
            buffer_origin, host_origin, region,
            buffer_pitches[0], buffer_pitches[1],
            host_pitches[0], host_pitches[1],
            buf,
            PYOPENCL_WAITLIST_ARGS, &evt
            ))
      );
    PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward);
  }




  inline
  event *enqueue_copy_buffer_rect(
      command_queue &cq,
      memory_object_holder &src,
      memory_object_holder &dst,
      py::object py_src_origin,
      py::object py_dst_origin,
      py::object py_region,
      py::sequence py_src_pitches,
      py::sequence py_dst_pitches,
      py::object py_wait_for)
  {
    PYOPENCL_PARSE_WAIT_FOR;
    COPY_PY_COORD_TRIPLE(src_origin);
    COPY_PY_COORD_TRIPLE(dst_origin);
    COPY_PY_REGION_TRIPLE(region);
    COPY_PY_PITCH_TUPLE(src_pitches);
    COPY_PY_PITCH_TUPLE(dst_pitches);

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED(clEnqueueCopyBufferRect, (
            cq.data(),
            src.data(), dst.data(),
            src_origin, dst_origin, region,
            src_pitches[0], src_pitches[1],
            dst_pitches[0], dst_pitches[1],
            PYOPENCL_WAITLIST_ARGS,
            &evt
            ))
      );

    PYOPENCL_RETURN_NEW_EVENT(evt);
  }

#endif

  // }}}

  // }}}

#if PYOPENCL_CL_VERSION >= 0x1020
  inline
  event *enqueue_fill_buffer(
      command_queue &cq,
      memory_object_holder &mem,
      py::object pattern,
      size_t offset,
      size_t size,
      py::object py_wait_for
      )
  {
    PYOPENCL_PARSE_WAIT_FOR;

    const void *pattern_buf;
    PYOPENCL_BUFFER_SIZE_T pattern_len;

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);

    ward->get(pattern.ptr(), PyBUF_ANY_CONTIGUOUS);

    pattern_buf = ward->m_buf.buf;
    pattern_len = ward->m_buf.len;
#else
    if (PyObject_AsReadBuffer(pattern.ptr(), &pattern_buf, &pattern_len))
      throw py::error_already_set();
#endif

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED(clEnqueueFillBuffer, (
            cq.data(),
            mem.data(),
            pattern_buf, pattern_len, offset, size,
            PYOPENCL_WAITLIST_ARGS, &evt
            ))
      );
    PYOPENCL_RETURN_NEW_EVENT(evt);
  }
#endif

  // }}}

  // {{{ image

  class image : public memory_object
  {
    public:
      image(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
        : memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf))
      { }

      py::object get_image_info(cl_image_info param_name) const
      {
        switch (param_name)
        {
          case CL_IMAGE_FORMAT:
            PYOPENCL_GET_INTEGRAL_INFO(Image, data(), param_name,
                cl_image_format);
          case CL_IMAGE_ELEMENT_SIZE:
          case CL_IMAGE_ROW_PITCH:
          case CL_IMAGE_SLICE_PITCH:
          case CL_IMAGE_WIDTH:
          case CL_IMAGE_HEIGHT:
          case CL_IMAGE_DEPTH:
#if PYOPENCL_CL_VERSION >= 0x1020
          case CL_IMAGE_ARRAY_SIZE:
#endif
            PYOPENCL_GET_INTEGRAL_INFO(Image, data(), param_name, size_t);

#if PYOPENCL_CL_VERSION >= 0x1020
          case CL_IMAGE_BUFFER:
            {
              cl_mem param_value;
              PYOPENCL_CALL_GUARDED(clGetImageInfo, \
                  (data(), param_name, sizeof(param_value), &param_value, 0));
              if (param_value == 0)
              {
                // no associated memory object? no problem.
                return py::none();
              }

              return create_mem_object_wrapper(param_value);
            }

          case CL_IMAGE_NUM_MIP_LEVELS:
          case CL_IMAGE_NUM_SAMPLES:
            PYOPENCL_GET_INTEGRAL_INFO(Image, data(), param_name, cl_uint);
#endif

          default:
            throw error("MemoryObject.get_image_info", CL_INVALID_VALUE);
        }
      }
  };




  // {{{ image formats

  inline
  cl_image_format *make_image_format(cl_channel_order ord, cl_channel_type tp)
  {
    std::unique_ptr<cl_image_format> result(new cl_image_format);
    result->image_channel_order = ord;
    result->image_channel_data_type = tp;
    return result.release();
  }

  inline
  py::list get_supported_image_formats(
      context const &ctx,
      cl_mem_flags flags,
      cl_mem_object_type image_type)
  {
    cl_uint num_image_formats;
    PYOPENCL_CALL_GUARDED(clGetSupportedImageFormats, (
          ctx.data(), flags, image_type,
          0, NULL, &num_image_formats));

    std::vector<cl_image_format> formats(num_image_formats);
    PYOPENCL_CALL_GUARDED(clGetSupportedImageFormats, (
          ctx.data(), flags, image_type,
          formats.size(), formats.empty( ) ? NULL : &formats.front(), NULL));

    PYOPENCL_RETURN_VECTOR(cl_image_format, formats);
  }

  inline
  cl_uint get_image_format_channel_count(cl_image_format const &fmt)
  {
    switch (fmt.image_channel_order)
    {
      case CL_R: return 1;
      case CL_A: return 1;
      case CL_RG: return 2;
      case CL_RA: return 2;
      case CL_RGB: return 3;
      case CL_RGBA: return 4;
      case CL_BGRA: return 4;
      case CL_INTENSITY: return 1;
      case CL_LUMINANCE: return 1;
      default:
        throw pyopencl::error("ImageFormat.channel_dtype_size",
            CL_INVALID_VALUE,
            "unrecognized channel order");
    }
  }

  inline
  cl_uint get_image_format_channel_dtype_size(cl_image_format const &fmt)
  {
    switch (fmt.image_channel_data_type)
    {
      case CL_SNORM_INT8: return 1;
      case CL_SNORM_INT16: return 2;
      case CL_UNORM_INT8: return 1;
      case CL_UNORM_INT16: return 2;
      case CL_UNORM_SHORT_565: return 2;
      case CL_UNORM_SHORT_555: return 2;
      case CL_UNORM_INT_101010: return 4;
      case CL_SIGNED_INT8: return 1;
      case CL_SIGNED_INT16: return 2;
      case CL_SIGNED_INT32: return 4;
      case CL_UNSIGNED_INT8: return 1;
      case CL_UNSIGNED_INT16: return 2;
      case CL_UNSIGNED_INT32: return 4;
      case CL_HALF_FLOAT: return 2;
      case CL_FLOAT: return 4;
      default:
        throw pyopencl::error("ImageFormat.channel_dtype_size",
            CL_INVALID_VALUE,
            "unrecognized channel data type");
    }
  }

  inline
  cl_uint get_image_format_item_size(cl_image_format const &fmt)
  {
    return get_image_format_channel_count(fmt)
      * get_image_format_channel_dtype_size(fmt);
  }

  // }}}

  // {{{ image creation

  inline
  image *create_image(
      context const &ctx,
      cl_mem_flags flags,
      cl_image_format const &fmt,
      py::sequence shape,
      py::sequence pitches,
      py::object buffer)
  {
    if (shape.ptr() == Py_None)
      throw pyopencl::error("Image", CL_INVALID_VALUE,
          "'shape' must be given");

    void *buf = 0;
    PYOPENCL_BUFFER_SIZE_T len = 0;

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    std::unique_ptr<py_buffer_wrapper> retained_buf_obj;
    if (buffer.ptr() != Py_None)
    {
      retained_buf_obj = std::unique_ptr<py_buffer_wrapper>(new py_buffer_wrapper);

      int py_buf_flags = PyBUF_ANY_CONTIGUOUS;
      if ((flags & CL_MEM_USE_HOST_PTR)
          && ((flags & CL_MEM_READ_WRITE)
            || (flags & CL_MEM_WRITE_ONLY)))
        py_buf_flags |= PyBUF_WRITABLE;

      retained_buf_obj->get(buffer.ptr(), py_buf_flags);

      buf = retained_buf_obj->m_buf.buf;
      len = retained_buf_obj->m_buf.len;
    }
#else
    py::object retained_buf_obj;
    if (buffer.ptr() != Py_None)
    {
      if ((flags & CL_MEM_USE_HOST_PTR)
          && ((flags & CL_MEM_READ_WRITE)
            || (flags & CL_MEM_WRITE_ONLY)))
      {
        if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len))
          throw py::error_already_set();
      }
      else
      {
        if (PyObject_AsReadBuffer(
              buffer.ptr(), const_cast<const void **>(&buf), &len))
          throw py::error_already_set();
      }

      if (flags & CL_MEM_USE_HOST_PTR)
        retained_buf_obj = buffer;
    }
#endif

    unsigned dims = py::len(shape);
    cl_int status_code;
    cl_mem mem;
    if (dims == 2)
    {
      size_t width = (shape[0]).cast<size_t>();
      size_t height = (shape[1]).cast<size_t>();

      size_t pitch = 0;
      if (pitches.ptr() != Py_None)
      {
        if (py::len(pitches) != 1)
          throw pyopencl::error("Image", CL_INVALID_VALUE,
              "invalid length of pitch tuple");
        pitch = (pitches[0]).cast<size_t>();
      }

      // check buffer size
      cl_int itemsize = get_image_format_item_size(fmt);
      if (buf && std::max(pitch, width*itemsize)*height > cl_uint(len))
          throw pyopencl::error("Image", CL_INVALID_VALUE,
              "buffer too small");

      PYOPENCL_PRINT_CALL_TRACE("clCreateImage2D");
      PYOPENCL_RETRY_IF_MEM_ERROR(
          {
            mem = clCreateImage2D(ctx.data(), flags, &fmt,
                width, height, pitch, buf, &status_code);
            if (status_code != CL_SUCCESS)
              throw pyopencl::error("clCreateImage2D", status_code);
          } );

    }
    else if (dims == 3)
    {
      size_t width = (shape[0]).cast<size_t>();
      size_t height = (shape[1]).cast<size_t>();
      size_t depth = (shape[2]).cast<size_t>();

      size_t pitch_x = 0;
      size_t pitch_y = 0;

      if (pitches.ptr() != Py_None)
      {
        if (py::len(pitches) != 2)
          throw pyopencl::error("Image", CL_INVALID_VALUE,
              "invalid length of pitch tuple");

        pitch_x = (pitches[0]).cast<size_t>();
        pitch_y = (pitches[1]).cast<size_t>();
      }

      // check buffer size
      cl_int itemsize = get_image_format_item_size(fmt);
      if (buf &&
          std::max(std::max(pitch_x, width*itemsize)*height, pitch_y)
          * depth > cl_uint(len))
        throw pyopencl::error("Image", CL_INVALID_VALUE,
            "buffer too small");

      PYOPENCL_PRINT_CALL_TRACE("clCreateImage3D");
      PYOPENCL_RETRY_IF_MEM_ERROR(
          {
            mem = clCreateImage3D(ctx.data(), flags, &fmt,
              width, height, depth, pitch_x, pitch_y, buf, &status_code);
            if (status_code != CL_SUCCESS)
              throw pyopencl::error("clCreateImage3D", status_code);
          } );
    }
    else
      throw pyopencl::error("Image", CL_INVALID_VALUE,
          "invalid dimension");

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    if (!(flags & CL_MEM_USE_HOST_PTR))
      retained_buf_obj.reset();
#endif

    try
    {
      return new image(mem, false, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(retained_buf_obj));
    }
    catch (...)
    {
      PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem));
      throw;
    }
  }

#if PYOPENCL_CL_VERSION >= 0x1020

  inline
  image *create_image_from_desc(
      context const &ctx,
      cl_mem_flags flags,
      cl_image_format const &fmt,
      cl_image_desc &desc,
      py::object buffer)
  {
    if (buffer.ptr() != Py_None &&
        !(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))
      PyErr_Warn(PyExc_UserWarning, "'hostbuf' was passed, "
          "but no memory flags to make use of it.");

    void *buf = 0;

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    std::unique_ptr<py_buffer_wrapper> retained_buf_obj;
    if (buffer.ptr() != Py_None)
    {
      retained_buf_obj = std::unique_ptr<py_buffer_wrapper>(new py_buffer_wrapper);

      int py_buf_flags = PyBUF_ANY_CONTIGUOUS;
      if ((flags & CL_MEM_USE_HOST_PTR)
          && ((flags & CL_MEM_READ_WRITE)
            || (flags & CL_MEM_WRITE_ONLY)))
        py_buf_flags |= PyBUF_WRITABLE;

      retained_buf_obj->get(buffer.ptr(), py_buf_flags);

      buf = retained_buf_obj->m_buf.buf;
    }
#else
    py::object retained_buf_obj;
    PYOPENCL_BUFFER_SIZE_T len;
    if (buffer.ptr() != Py_None)
    {
      if ((flags & CL_MEM_USE_HOST_PTR)
          && ((flags & CL_MEM_READ_WRITE)
            || (flags & CL_MEM_WRITE_ONLY)))
      {
        if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len))
          throw py::error_already_set();
      }
      else
      {
        if (PyObject_AsReadBuffer(
              buffer.ptr(), const_cast<const void **>(&buf), &len))
          throw py::error_already_set();
      }

      if (flags & CL_MEM_USE_HOST_PTR)
        retained_buf_obj = buffer;
    }
#endif

    PYOPENCL_PRINT_CALL_TRACE("clCreateImage");
    cl_int status_code;
    cl_mem mem = clCreateImage(ctx.data(), flags, &fmt, &desc, buf, &status_code);
    if (status_code != CL_SUCCESS)
      throw pyopencl::error("clCreateImage", status_code);

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    if (!(flags & CL_MEM_USE_HOST_PTR))
      retained_buf_obj.reset();
#endif

    try
    {
      return new image(mem, false, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(retained_buf_obj));
    }
    catch (...)
    {
      PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem));
      throw;
    }
  }

#endif

  // }}}

  // {{{ image transfers

  inline
  event *enqueue_read_image(
      command_queue &cq,
      image &img,
      py::object py_origin, py::object py_region,
      py::object buffer,
      size_t row_pitch, size_t slice_pitch,
      py::object py_wait_for,
      bool is_blocking)
  {
    PYOPENCL_PARSE_WAIT_FOR;
    COPY_PY_COORD_TRIPLE(origin);
    COPY_PY_REGION_TRIPLE(region);

    void *buf;

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);

    ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE);

    buf = ward->m_buf.buf;
#else
    py::object ward = buffer;
    PYOPENCL_BUFFER_SIZE_T len;
    if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len))
      throw py::error_already_set();
#endif

    cl_event evt;

    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED(clEnqueueReadImage, (
            cq.data(),
            img.data(),
            PYOPENCL_CAST_BOOL(is_blocking),
            origin, region, row_pitch, slice_pitch, buf,
            PYOPENCL_WAITLIST_ARGS, &evt
            ));
      );
    PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward);
  }




  inline
  event *enqueue_write_image(
      command_queue &cq,
      image &img,
      py::object py_origin, py::object py_region,
      py::object buffer,
      size_t row_pitch, size_t slice_pitch,
      py::object py_wait_for,
      bool is_blocking)
  {
    PYOPENCL_PARSE_WAIT_FOR;
    COPY_PY_COORD_TRIPLE(origin);
    COPY_PY_REGION_TRIPLE(region);

    const void *buf;

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);

    ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS);

    buf = ward->m_buf.buf;
#else
    py::object ward = buffer;
    PYOPENCL_BUFFER_SIZE_T len;
    if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len))
      throw py::error_already_set();
#endif

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED(clEnqueueWriteImage, (
            cq.data(),
            img.data(),
            PYOPENCL_CAST_BOOL(is_blocking),
            origin, region, row_pitch, slice_pitch, buf,
            PYOPENCL_WAITLIST_ARGS, &evt
            ));
      );
    PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward);
  }




  inline
  event *enqueue_copy_image(
      command_queue &cq,
      memory_object_holder &src,
      memory_object_holder &dest,
      py::object py_src_origin,
      py::object py_dest_origin,
      py::object py_region,
      py::object py_wait_for
      )
  {
    PYOPENCL_PARSE_WAIT_FOR;
    COPY_PY_COORD_TRIPLE(src_origin);
    COPY_PY_COORD_TRIPLE(dest_origin);
    COPY_PY_REGION_TRIPLE(region);

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED(clEnqueueCopyImage, (
            cq.data(), src.data(), dest.data(),
            src_origin, dest_origin, region,
            PYOPENCL_WAITLIST_ARGS, &evt
            ));
      );
    PYOPENCL_RETURN_NEW_EVENT(evt);
  }




  inline
  event *enqueue_copy_image_to_buffer(
      command_queue &cq,
      memory_object_holder &src,
      memory_object_holder &dest,
      py::object py_origin,
      py::object py_region,
      size_t offset,
      py::object py_wait_for
      )
  {
    PYOPENCL_PARSE_WAIT_FOR;
    COPY_PY_COORD_TRIPLE(origin);
    COPY_PY_REGION_TRIPLE(region);

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED(clEnqueueCopyImageToBuffer, (
            cq.data(), src.data(), dest.data(),
            origin, region, offset,
            PYOPENCL_WAITLIST_ARGS, &evt
            ));
      );
    PYOPENCL_RETURN_NEW_EVENT(evt);
  }




  inline
  event *enqueue_copy_buffer_to_image(
      command_queue &cq,
      memory_object_holder &src,
      memory_object_holder &dest,
      size_t offset,
      py::object py_origin,
      py::object py_region,
      py::object py_wait_for
      )
  {
    PYOPENCL_PARSE_WAIT_FOR;
    COPY_PY_COORD_TRIPLE(origin);
    COPY_PY_REGION_TRIPLE(region);

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED(clEnqueueCopyBufferToImage, (
            cq.data(), src.data(), dest.data(),
            offset, origin, region,
            PYOPENCL_WAITLIST_ARGS, &evt
            ));
      );
    PYOPENCL_RETURN_NEW_EVENT(evt);
  }

  // }}}

#if PYOPENCL_CL_VERSION >= 0x1020
  inline
  event *enqueue_fill_image(
      command_queue &cq,
      memory_object_holder &mem,
      py::object color,
      py::object py_origin, py::object py_region,
      py::object py_wait_for
      )
  {
    PYOPENCL_PARSE_WAIT_FOR;

    COPY_PY_COORD_TRIPLE(origin);
    COPY_PY_REGION_TRIPLE(region);

    const void *color_buf;

#ifdef PYOPENCL_USE_NEW_BUFFER_INTERFACE
    std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);

    ward->get(color.ptr(), PyBUF_ANY_CONTIGUOUS);

    color_buf = ward->m_buf.buf;
#else
    PYOPENCL_BUFFER_SIZE_T color_len;
    if (PyObject_AsReadBuffer(color.ptr(), &color_buf, &color_len))
      throw py::error_already_set();
#endif

    cl_event evt;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      PYOPENCL_CALL_GUARDED(clEnqueueFillImage, (
            cq.data(),
            mem.data(),
            color_buf, origin, region,
            PYOPENCL_WAITLIST_ARGS, &evt
            ));
      );
    PYOPENCL_RETURN_NEW_EVENT(evt);
  }
#endif

  // }}}

  // {{{ maps
  class memory_map
  {
    private:
      bool m_valid;
      std::shared_ptr<command_queue> m_queue;
      memory_object m_mem;
      void *m_ptr;

    public:
      memory_map(std::shared_ptr<command_queue> cq, memory_object const &mem, void *ptr)
        : m_valid(true), m_queue(cq), m_mem(mem), m_ptr(ptr)
      {
      }

      ~memory_map()
      {
        if (m_valid)
          delete release(0, py::none());
      }

      event *release(command_queue *cq, py::object py_wait_for)
      {
        PYOPENCL_PARSE_WAIT_FOR;

        if (cq == 0)
          cq = m_queue.get();

        cl_event evt;
        PYOPENCL_CALL_GUARDED(clEnqueueUnmapMemObject, (
              cq->data(), m_mem.data(), m_ptr,
              PYOPENCL_WAITLIST_ARGS, &evt
              ));

        m_valid = false;

        PYOPENCL_RETURN_NEW_EVENT(evt);
      }
  };




  inline
  py::object enqueue_map_buffer(
      std::shared_ptr<command_queue> cq,
      memory_object_holder &buf,
      cl_map_flags flags,
      size_t offset,
      py::object py_shape, py::object dtype,
      py::object py_order, py::object py_strides,
      py::object py_wait_for,
      bool is_blocking
      )
  {
    PYOPENCL_PARSE_WAIT_FOR;
    PYOPENCL_PARSE_NUMPY_ARRAY_SPEC;

    npy_uintp size_in_bytes = tp_descr->elsize;
    for (npy_intp sdim: shape)
      size_in_bytes *= sdim;

    py::object result;

    cl_event evt;
    cl_int status_code;
    PYOPENCL_PRINT_CALL_TRACE("clEnqueueMapBuffer");
    void *mapped;

    PYOPENCL_RETRY_IF_MEM_ERROR(
        {
          {
            py::gil_scoped_release release;
            mapped = clEnqueueMapBuffer(
                  cq->data(), buf.data(),
                  PYOPENCL_CAST_BOOL(is_blocking), flags,
                  offset, size_in_bytes,
                  PYOPENCL_WAITLIST_ARGS, &evt,
                  &status_code);
          }
          if (status_code != CL_SUCCESS)
            throw pyopencl::error("clEnqueueMapBuffer", status_code);
        } );

    event evt_handle(evt, false);

    std::unique_ptr<memory_map> map;
      result = py::object(py::reinterpret_steal<py::object>(PyArray_NewFromDescr(
          &PyArray_Type, tp_descr,
          shape.size(),
          shape.empty() ? NULL : &shape.front(),
          strides.empty() ? NULL : &strides.front(),
          mapped, ary_flags, /*obj*/NULL)));
      if (size_in_bytes != (npy_uintp) PyArray_NBYTES(result.ptr()))
        throw pyopencl::error("enqueue_map_buffer", CL_INVALID_VALUE,
            "miscalculated numpy array size (not contiguous?)");

       map = std::unique_ptr<memory_map>(new memory_map(cq, buf, mapped));
    }
    catch (...)
    {
      PYOPENCL_CALL_GUARDED_CLEANUP(clEnqueueUnmapMemObject, (
            cq->data(), buf.data(), mapped, 0, 0, 0));
    py::object map_py(handle_from_new_ptr(map.release()));
    PyArray_BASE(result.ptr()) = map_py.ptr();
    Py_INCREF(map_py.ptr());

    return py::make_tuple(
        result,
        handle_from_new_ptr(new event(evt_handle)));
  }




  inline
  py::object enqueue_map_image(
      std::shared_ptr<command_queue> cq,
      memory_object_holder &img,
      cl_map_flags flags,
      py::object py_origin,
      py::object py_region,
      py::object py_shape, py::object dtype,
      py::object py_order, py::object py_strides,
      py::object py_wait_for,
      bool is_blocking
      )
  {
    PYOPENCL_PARSE_WAIT_FOR;
    PYOPENCL_PARSE_NUMPY_ARRAY_SPEC;
    COPY_PY_COORD_TRIPLE(origin);
    COPY_PY_REGION_TRIPLE(region);

    cl_event evt;
    cl_int status_code;
    PYOPENCL_PRINT_CALL_TRACE("clEnqueueMapImage");
    size_t row_pitch, slice_pitch;
    void *mapped;
    PYOPENCL_RETRY_IF_MEM_ERROR(
      {
        {
          py::gil_scoped_release release;
          mapped = clEnqueueMapImage(
                cq->data(), img.data(),
                PYOPENCL_CAST_BOOL(is_blocking), flags,
                origin, region, &row_pitch, &slice_pitch,
                PYOPENCL_WAITLIST_ARGS, &evt,
                &status_code);
        }
        if (status_code != CL_SUCCESS)
          throw pyopencl::error("clEnqueueMapImage", status_code);