diff --git a/doc/source/runtime.rst b/doc/source/runtime.rst index 238c7268c7e038489262dd941ffd37464ea04613..1bee111511a34042d3fc4bacc48d0757ade22626 100644 --- a/doc/source/runtime.rst +++ b/doc/source/runtime.rst @@ -224,6 +224,16 @@ Command Queues and Events .. versionadded:: 0.91.5 +.. class:: UserEvent(context) + + A subclass of :class:`Event`. Only available with OpenCL 1.1 and newer. + + .. versionadded:: 0.92 + + .. method:: set_status(status) + + See :class:`command_execution_status` for possible values of *status*. + Memory ------ @@ -290,6 +300,46 @@ Buffers .. versionadded:: 0.91.5 +.. function:: enqueue_read_buffer_rect(queue, mem, hostbuf, buffer_origin, host_origin, region, buffer_pitches=None, host_pitches=None, wait_for=None, is_blocking=False) + + The *origin* and *region* parameters are :class:`tuple` instances of length + three or shorter. The *pitches* parameters are :class:`tuple` instances of + length two or shorter, which may be zero to indicate 'tight packing'. + + |std-enqueue-blurb| + + *hostbuf* |buf-iface| + + Only available in OpenCL 1.1 and newer. + + .. versionadded:: 0.92 + +.. function:: enqueue_write_buffer_rect(queue, mem, hostbuf, buffer_origin, host_origin, region, buffer_pitches=None, host_pitches=None, wait_for=None, is_blocking=False) + + The *origin* and *region* parameters are :class:`tuple` instances of length + three or shorter. The *pitches* parameters are :class:`tuple` instances of + length two or shorter, which may be zero to indicate 'tight packing'. + + |std-enqueue-blurb| + + *hostbuf* |buf-iface| + + Only available in OpenCL 1.1 and newer. + + .. versionadded:: 0.92 + +.. function:: enqueue_copy_buffer_rect(queue, src, dst, src_origin, dst_origin, region, src_pitches=None, dst_pitches=None, wait_for=None) + + The *origin* and *region* parameters are :class:`tuple` instances of length + three or shorter. The *pitches* parameters are :class:`tuple` instances of + length two or shorter, which may be zero to indicate 'tight packing'. + + |std-enqueue-blurb| + + Only available in OpenCL 1.1 and newer. + + .. versionadded:: 0.92 + Image Formats ^^^^^^^^^^^^^ diff --git a/doc/upload-docs.sh b/doc/upload-docs.sh index cd5e34f182e0eafe105a1a96f1634d74d4032abf..1c29e30a18fc489caeeb5ba60a52a9982f2ecc9f 100755 --- a/doc/upload-docs.sh +++ b/doc/upload-docs.sh @@ -1,4 +1,4 @@ #! /bin/sh -cp build/html/runtime.rst build/html/reference.rst +cp build/html/runtime.html build/html/reference.html rsync --progress --verbose --archive --delete build/html/* buster:doc/pyopencl diff --git a/src/wrapper/wrap_cl.cpp b/src/wrapper/wrap_cl.cpp index 66d2984a8502e4c799d327dee3723d6601464f56..417a383eb5a817c85ad9309abf08da8784608cc9 100644 --- a/src/wrapper/wrap_cl.cpp +++ b/src/wrapper/wrap_cl.cpp @@ -565,9 +565,22 @@ BOOST_PYTHON_MODULE(_cl) py::return_value_policy<py::manage_new_object>()); DEF_SIMPLE_FUNCTION(enqueue_wait_for_events); DEF_SIMPLE_FUNCTION(enqueue_barrier); + +#ifdef CL_VERSION_1_1 + { + typedef user_event cls; + py::class_<cls, py::bases<event>, boost::noncopyable>("UserEvent", py::no_init) + .def("__init__", make_constructor( + create_user_event, py::default_call_policies(), py::args("context"))) + .DEF_SIMPLE_METHOD(set_status) + ; + } +#endif + // }}} // {{{ memory_object + { typedef memory_object cls; py::class_<cls, boost::noncopyable>("MemoryObject", py::no_init) @@ -580,6 +593,9 @@ BOOST_PYTHON_MODULE(_cl) ; } + // }}} + + // {{{ buffer { typedef buffer cls; py::class_<cls, py::bases<memory_object>, boost::noncopyable>( @@ -600,6 +616,9 @@ BOOST_PYTHON_MODULE(_cl) ; } + // {{{ transfers + + // {{{ byte-for-byte py::def("enqueue_read_buffer", enqueue_read_buffer, (py::args("queue", "mem", "hostbuf"), py::arg("device_offset")=0, @@ -624,6 +643,44 @@ BOOST_PYTHON_MODULE(_cl) py::arg("wait_for")=py::object() ), py::return_value_policy<py::manage_new_object>()); + + // }}} + + // {{{ rectangular + +#ifdef CL_VERSION_1_1 + py::def("enqueue_read_buffer_rect", enqueue_read_buffer_rect, + (py::args("queue", "mem", "hostbuf", + "buffer_origin", "host_origin", "region"), + py::arg("buffer_pitches")=py::object(), + py::arg("host_pitches")=py::object(), + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=false + ), + py::return_value_policy<py::manage_new_object>()); + py::def("enqueue_write_buffer_rect", enqueue_write_buffer_rect, + (py::args("queue", "mem", "hostbuf", + "buffer_origin", "host_origin", "region"), + py::arg("buffer_pitches")=py::object(), + py::arg("host_pitches")=py::object(), + py::arg("wait_for")=py::object(), + py::arg("is_blocking")=false + ), + py::return_value_policy<py::manage_new_object>()); + py::def("enqueue_copy_buffer_rect", enqueue_copy_buffer_rect, + (py::args("queue", "src", "dst", + "src_origin", "dst_origin", "region"), + py::arg("src_pitches")=py::object(), + py::arg("dst_pitches")=py::object(), + py::arg("wait_for")=py::object() + ), + py::return_value_policy<py::manage_new_object>()); +#endif + + // }}} + + // }}} + // }}} // {{{ image diff --git a/src/wrapper/wrap_cl.hpp b/src/wrapper/wrap_cl.hpp index ea4c0cccf072e80a03d6f81d2e5a679a5944cb22..35e85e265d62140b5c81f8858f96941a3b882453 100644 --- a/src/wrapper/wrap_cl.hpp +++ b/src/wrapper/wrap_cl.hpp @@ -29,6 +29,7 @@ #include <vector> #include <utility> #include <numeric> +#include <boost/python/slice.hpp> #include <boost/foreach.hpp> #include <boost/scoped_array.hpp> #include "wrap_helpers.hpp" @@ -329,6 +330,7 @@ namespace pyopencl // }}} + inline py::tuple get_cl_header_version() { #if defined(CL_VERSION_1_1) @@ -389,6 +391,7 @@ namespace pyopencl + inline py::list get_platforms() { cl_uint num_platforms = 0; @@ -660,6 +663,7 @@ namespace pyopencl + inline std::vector<cl_context_properties> parse_context_properties( py::object py_properties) { @@ -701,6 +705,7 @@ namespace pyopencl + inline context *create_context(py::object py_devices, py::object py_properties, py::object py_dev_type) { @@ -949,6 +954,7 @@ namespace pyopencl + inline void wait_for_events(py::object events) { cl_uint num_events_in_wait_list = 0; @@ -965,6 +971,7 @@ namespace pyopencl + inline event *enqueue_marker(command_queue &cq) { cl_event evt; @@ -978,6 +985,7 @@ namespace pyopencl + inline void enqueue_wait_for_events(command_queue &cq, py::object py_events) { cl_uint num_events = 0; @@ -994,14 +1002,59 @@ namespace pyopencl + inline void enqueue_barrier(command_queue &cq) { PYOPENCL_CALL_GUARDED(clEnqueueBarrier, (cq.data())); } + + + +#ifdef CL_VERSION_1_1 + class user_event : public event + { + public: + user_event(cl_event evt, bool retain) + : event(evt, retain) + { } + + void set_status(cl_int execution_status) + { + PYOPENCL_CALL_GUARDED(clSetUserEventStatus, (data(), execution_status)); + } + }; + + + + + inline + event *create_user_event(context &ctx) + { + cl_int status_code; + cl_event evt = clCreateUserEvent(ctx.data(), &status_code) + PYOPENCL_PRINT_CALL_TRACE("clCreateUserEvent"); + + if (status_code != CL_SUCCESS) + throw pyopencl::error("UserEvent", status_code); + + try + { + return new user_event(evt, false); + } + catch (...) + { + clReleaseEvent(evt); + throw; + } + } + +#endif + // }}} - // {{{ memory objects + // {{{ memory_object + class memory_object : boost::noncopyable { private: @@ -1065,8 +1118,9 @@ namespace pyopencl py::object get_info(cl_mem_info param_name) const; }; + // }}} - + // {{{ buffer class buffer : public memory_object { @@ -1099,9 +1153,9 @@ namespace pyopencl } } - buffer *getitem(py::object slc) const + buffer *getitem(py::slice slc) const { - Py_ssize_t start, end, stride, length; + PYOPENCL_BUFFER_SIZE_T start, end, stride, length; size_t my_length; PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, @@ -1124,9 +1178,9 @@ namespace pyopencl #endif }; + // {{{ buffer creation - - + inline buffer *create_buffer( context &ctx, cl_mem_flags flags, @@ -1184,9 +1238,13 @@ namespace pyopencl } } + // }}} + // {{{ buffer transfers + // {{{ byte-for-byte transfers + inline event *enqueue_read_buffer( command_queue &cq, memory_object &mem, @@ -1225,6 +1283,7 @@ namespace pyopencl + inline event *enqueue_write_buffer( command_queue &cq, memory_object &mem, @@ -1263,6 +1322,7 @@ namespace pyopencl + inline event *enqueue_copy_buffer( command_queue &cq, memory_object &src, @@ -1296,7 +1356,140 @@ namespace pyopencl // }}} + // {{{ rectangular transfers +#ifdef CL_VERSION_1_1 + inline + event *enqueue_read_buffer_rect( + command_queue &cq, + memory_object &mem, + py::object buffer, + py::object py_buffer_origin, + py::object py_host_origin, + py::object py_region, + py::object py_buffer_pitches, + py::object py_host_pitches, + py::object py_wait_for, + bool is_blocking + ) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(buffer_origin); + COPY_PY_COORD_TRIPLE(host_origin); + COPY_PY_REGION_TRIPLE(region); + COPY_PY_PITCH_TUPLE(buffer_pitches); + COPY_PY_PITCH_TUPLE(host_pitches); + + void *buf; + PYOPENCL_BUFFER_SIZE_T len; + + if (PyObject_AsWriteBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); + + cl_event evt; + PYOPENCL_CALL_GUARDED(clEnqueueReadBufferRect, ( + cq.data(), + mem.data(), + PYOPENCL_CAST_BOOL(is_blocking), + buffer_origin, host_origin, region, + buffer_pitches[0], buffer_pitches[1], + host_pitches[0], host_pitches[1], + buf, + num_events_in_wait_list, event_wait_list.empty( ) ? NULL : &event_wait_list.front(), &evt + )); + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + + + + inline + event *enqueue_write_buffer_rect( + command_queue &cq, + memory_object &mem, + py::object buffer, + py::object py_buffer_origin, + py::object py_host_origin, + py::object py_region, + py::object py_buffer_pitches, + py::object py_host_pitches, + py::object py_wait_for, + bool is_blocking + ) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(buffer_origin); + COPY_PY_COORD_TRIPLE(host_origin); + COPY_PY_REGION_TRIPLE(region); + COPY_PY_PITCH_TUPLE(buffer_pitches); + COPY_PY_PITCH_TUPLE(host_pitches); + + const void *buf; + PYOPENCL_BUFFER_SIZE_T len; + + if (PyObject_AsReadBuffer(buffer.ptr(), &buf, &len)) + throw py::error_already_set(); + + cl_event evt; + PYOPENCL_CALL_GUARDED(clEnqueueWriteBufferRect, ( + cq.data(), + mem.data(), + PYOPENCL_CAST_BOOL(is_blocking), + buffer_origin, host_origin, region, + buffer_pitches[0], buffer_pitches[1], + host_pitches[0], host_pitches[1], + buf, + num_events_in_wait_list, event_wait_list.empty( ) ? NULL : &event_wait_list.front(), &evt + )); + PYOPENCL_RETURN_NEW_EVENT(evt); + } + + + + + inline + event *enqueue_copy_buffer_rect( + command_queue &cq, + memory_object &src, + memory_object &dst, + py::object py_src_origin, + py::object py_dst_origin, + py::object py_region, + py::object py_src_pitches, + py::object py_dst_pitches, + py::object py_wait_for) + { + PYOPENCL_PARSE_WAIT_FOR; + COPY_PY_COORD_TRIPLE(src_origin); + COPY_PY_COORD_TRIPLE(dst_origin); + COPY_PY_REGION_TRIPLE(region); + COPY_PY_PITCH_TUPLE(src_pitches); + COPY_PY_PITCH_TUPLE(dst_pitches); + + cl_event evt; + PYOPENCL_CALL_GUARDED(clEnqueueCopyBufferRect, ( + cq.data(), + src.data(), dst.data(), + src_origin, dst_origin, region, + src_pitches[0], src_pitches[1], + dst_pitches[0], dst_pitches[1], + num_events_in_wait_list, + event_wait_list.empty( ) ? NULL : &event_wait_list.front(), + &evt + )); + + PYOPENCL_RETURN_NEW_EVENT(evt); + } + +#endif + + // }}} + + // }}} + + // }}} + // {{{ image + class image : public memory_object { public: @@ -1328,6 +1521,9 @@ namespace pyopencl + // {{{ image formats + + inline cl_image_format *make_image_format(cl_channel_order ord, cl_channel_type tp) { std::auto_ptr<cl_image_format> result(new cl_image_format); @@ -1356,6 +1552,7 @@ namespace pyopencl PYOPENCL_RETURN_VECTOR(cl_image_format, formats); } + inline cl_uint get_image_format_channel_count(cl_image_format const &fmt) { switch (fmt.image_channel_order) @@ -1376,6 +1573,7 @@ namespace pyopencl } } + inline cl_uint get_image_format_channel_dtype_size(cl_image_format const &fmt) { switch (fmt.image_channel_data_type) @@ -1402,16 +1600,19 @@ namespace pyopencl } } + inline cl_uint get_image_format_item_size(cl_image_format const &fmt) { return get_image_format_channel_count(fmt) * get_image_format_channel_dtype_size(fmt); } + // }}} + // {{{ image creation - - inline image *create_image( + inline + image *create_image( context const &ctx, cl_mem_flags flags, cl_image_format const &fmt, @@ -1543,9 +1744,11 @@ namespace pyopencl } } + // }}} + // {{{ image transfers - + inline event *enqueue_read_image( command_queue &cq, image &img, @@ -1587,6 +1790,7 @@ namespace pyopencl + inline event *enqueue_write_image( command_queue &cq, image &img, @@ -1628,6 +1832,7 @@ namespace pyopencl + inline event *enqueue_copy_image( command_queue &cq, memory_object &src, @@ -1655,6 +1860,7 @@ namespace pyopencl + inline event *enqueue_copy_image_to_buffer( command_queue &cq, memory_object &src, @@ -1681,6 +1887,7 @@ namespace pyopencl + inline event *enqueue_copy_buffer_to_image( command_queue &cq, memory_object &src, @@ -1706,6 +1913,8 @@ namespace pyopencl // }}} + // }}} + // {{{ maps class memory_map { @@ -1749,6 +1958,7 @@ namespace pyopencl + inline py::object enqueue_map_buffer( command_queue &cq, memory_object &buf, @@ -1807,6 +2017,7 @@ namespace pyopencl + inline py::object enqueue_map_image( command_queue &cq, memory_object &img, @@ -1934,6 +2145,7 @@ namespace pyopencl // }}} // {{{ program + class program : boost::noncopyable { private: @@ -2080,7 +2292,8 @@ namespace pyopencl - inline program *create_program_with_source( + inline + program *create_program_with_source( context &ctx, std::string const &src) { @@ -2109,7 +2322,8 @@ namespace pyopencl - inline program *create_program_with_binary( + inline + program *create_program_with_binary( context &ctx, py::object py_devices, py::object py_binaries) @@ -2159,7 +2373,7 @@ namespace pyopencl } - + inline void unload_compiler() { PYOPENCL_CALL_GUARDED(clUnloadCompiler, ()); @@ -2357,7 +2571,7 @@ namespace pyopencl }; - + inline py::list create_kernels_in_program(program &pgm) { cl_uint num_kernels; @@ -2378,6 +2592,7 @@ namespace pyopencl + inline event *enqueue_nd_range_kernel( command_queue &cq, kernel &knl, @@ -2438,6 +2653,7 @@ namespace pyopencl + inline event *enqueue_task( command_queue &cq, kernel &knl, @@ -2519,6 +2735,7 @@ namespace pyopencl #define PYOPENCL_WRAP_BUFFER_CREATOR(TYPE, NAME, CL_NAME, ARGS, CL_ARGS) \ + inline \ TYPE *NAME ARGS \ { \ cl_int status_code; \ @@ -2579,6 +2796,7 @@ namespace pyopencl + inline py::tuple get_gl_object_info(memory_object const &mem) { cl_gl_object_type otype; @@ -2588,6 +2806,7 @@ namespace pyopencl } #define WRAP_GL_ENQUEUE(what, What) \ + inline \ event *enqueue_##what##_gl_objects( \ command_queue &cq, \ py::object py_mem_objects, \ @@ -2617,6 +2836,7 @@ namespace pyopencl #if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) + inline py::object get_gl_context_info_khr( py::object py_properties, cl_gl_context_info param_name @@ -2749,6 +2969,7 @@ namespace pyopencl } // }}} + } diff --git a/src/wrapper/wrap_helpers.hpp b/src/wrapper/wrap_helpers.hpp index 903cfd3f8724789c80a56bebad57eb43f3b544f5..002f6d41d5c06d4d95b141ed85a523351116bcf7 100644 --- a/src/wrapper/wrap_helpers.hpp +++ b/src/wrapper/wrap_helpers.hpp @@ -69,8 +69,19 @@ namespace py = boost::python; { \ size_t my_len = len(py_##NAME); \ if (my_len > 3) \ - throw error("image copy", CL_INVALID_VALUE, #NAME "has too many components"); \ - for (size_t i = 0; i < std::min(size_t(3), my_len); ++i) \ + throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \ + for (size_t i = 0; i < my_len; ++i) \ + NAME[i] = py::extract<size_t>(py_##NAME[i])(); \ + } + +#define COPY_PY_PITCH_TUPLE(NAME) \ + size_t NAME[2] = {0, 0}; \ + if (py_##NAME.ptr() != Py_None) \ + { \ + size_t my_len = len(py_##NAME); \ + if (my_len > 2) \ + throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \ + for (size_t i = 0; i < my_len; ++i) \ NAME[i] = py::extract<size_t>(py_##NAME[i])(); \ } @@ -79,8 +90,8 @@ namespace py = boost::python; { \ size_t my_len = len(py_##NAME); \ if (my_len > 3) \ - throw error("image copy", CL_INVALID_VALUE, #NAME "has too many components"); \ - for (size_t i = 0; i < std::min(size_t(3), my_len); ++i) \ + throw error("transfer", CL_INVALID_VALUE, #NAME "has too many components"); \ + for (size_t i = 0; i < my_len; ++i) \ NAME[i] = py::extract<size_t>(py_##NAME[i])(); \ }