Newer
Older
return py::reinterpret_borrow<py::object>(m_ward->m_buf.obj);
}
virtual void wait()
{
event::wait();
m_ward.reset();
}
virtual void wait_during_cleanup_without_releasing_the_gil()
{
event::wait_during_cleanup_without_releasing_the_gil();
m_ward.reset();
}
};
inline
void wait_for_events(py::object events)
{
cl_uint num_events_in_wait_list = 0;
std::vector<cl_event> event_wait_list(len(events));
event_wait_list[num_events_in_wait_list++] = py::cast<event &>(evt).data();
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
PYOPENCL_CALL_GUARDED_THREADED(clWaitForEvents, (
PYOPENCL_WAITLIST_ARGS));
}
#if PYOPENCL_CL_VERSION >= 0x1020
inline
event *enqueue_marker_with_wait_list(command_queue &cq,
py::object py_wait_for)
{
PYOPENCL_PARSE_WAIT_FOR;
cl_event evt;
PYOPENCL_CALL_GUARDED(clEnqueueMarkerWithWaitList, (
cq.data(), PYOPENCL_WAITLIST_ARGS, &evt));
PYOPENCL_RETURN_NEW_EVENT(evt);
}
inline
event *enqueue_barrier_with_wait_list(command_queue &cq,
py::object py_wait_for)
{
PYOPENCL_PARSE_WAIT_FOR;
cl_event evt;
PYOPENCL_CALL_GUARDED(clEnqueueBarrierWithWaitList,
(cq.data(), PYOPENCL_WAITLIST_ARGS, &evt));
PYOPENCL_RETURN_NEW_EVENT(evt);
}
#endif
// {{{ used internally for pre-OpenCL-1.2 contexts
inline
event *enqueue_marker(command_queue &cq)
{
cl_event evt;
PYOPENCL_CALL_GUARDED(clEnqueueMarker, (
cq.data(), &evt));
PYOPENCL_RETURN_NEW_EVENT(evt);
}
inline
void enqueue_wait_for_events(command_queue &cq, py::object py_events)
{
cl_uint num_events = 0;
std::vector<cl_event> event_list(len(py_events));
for (py::handle py_evt: py_events)
event_list[num_events++] = py::cast<event &>(py_evt).data();
PYOPENCL_CALL_GUARDED(clEnqueueWaitForEvents, (
cq.data(), num_events, event_list.empty( ) ? nullptr : &event_list.front()));
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
}
inline
void enqueue_barrier(command_queue &cq)
{
PYOPENCL_CALL_GUARDED(clEnqueueBarrier, (cq.data()));
}
// }}}
#if PYOPENCL_CL_VERSION >= 0x1010
class user_event : public event
{
public:
user_event(cl_event evt, bool retain)
: event(evt, retain)
{ }
void set_status(cl_int execution_status)
{
PYOPENCL_CALL_GUARDED(clSetUserEventStatus, (data(), execution_status));
}
};
inline
user_event *create_user_event(context &ctx)
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
{
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCreateUserEvent");
cl_event evt = clCreateUserEvent(ctx.data(), &status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("UserEvent", status_code);
try
{
return new user_event(evt, false);
}
catch (...)
{
clReleaseEvent(evt);
throw;
}
}
#endif
// }}}
py::object create_mem_object_wrapper(cl_mem mem, bool retain);
class memory_object_holder
{
public:
virtual const cl_mem data() const = 0;
PYOPENCL_EQUALITY_TESTS(memory_object_holder);
size_t size() const
{
size_t param_value;
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
(data(), CL_MEM_SIZE, sizeof(param_value), ¶m_value, 0));
return param_value;
}
py::object get_info(cl_mem_info param_name) const;
virtual ~memory_object_holder()
{ }
class memory_object : noncopyable, public memory_object_holder
typedef std::unique_ptr<py_buffer_wrapper> hostbuf_t;
private:
bool m_valid;
cl_mem m_mem;
hostbuf_t m_hostbuf;
public:
memory_object(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
: m_valid(true), m_mem(mem)
{
if (retain)
PYOPENCL_CALL_GUARDED(clRetainMemObject, (mem));
m_hostbuf = std::move(hostbuf);
}
memory_object(memory_object &src)
: m_valid(true), m_mem(src.m_mem)
{
PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem));
}
memory_object(memory_object &&src)
: m_valid(true), m_mem(src.m_mem), m_hostbuf(std::move(src.m_hostbuf))
{ }
memory_object(memory_object_holder const &src)
: m_valid(true), m_mem(src.data())
{
PYOPENCL_CALL_GUARDED(clRetainMemObject, (m_mem));
}
void release()
{
if (!m_valid)
throw error("MemoryObject.free", CL_INVALID_VALUE,
"trying to double-unref mem object");
PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseMemObject, (m_mem));
m_valid = false;
}
~memory_object()
{
if (m_valid)
release();
}
py::object hostbuf()
{
if (m_hostbuf.get())
return py::reinterpret_borrow<py::object>(m_hostbuf->m_buf.obj);
}
const cl_mem data() const
{ return m_mem; }
};
#if PYOPENCL_CL_VERSION >= 0x1020
inline
event *enqueue_migrate_mem_objects(
command_queue &cq,
py::object py_mem_objects,
cl_mem_migration_flags flags,
py::object py_wait_for)
{
PYOPENCL_PARSE_WAIT_FOR;
std::vector<cl_mem> mem_objects;
for (py::handle mo: py_mem_objects)
mem_objects.push_back(py::cast<const memory_object &>(mo).data());
cl_event evt;
PYOPENCL_RETRY_IF_MEM_ERROR(
PYOPENCL_CALL_GUARDED(clEnqueueMigrateMemObjects, (
cq.data(),
mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(),
flags,
PYOPENCL_WAITLIST_ARGS, &evt
));
);
PYOPENCL_RETURN_NEW_EVENT(evt);
}
#endif
// }}}
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
// {{{ buffer
inline cl_mem create_buffer(
cl_context ctx,
cl_mem_flags flags,
size_t size,
void *host_ptr)
{
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCreateBuffer");
cl_mem mem = clCreateBuffer(ctx, flags, size, host_ptr, &status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("create_buffer", status_code);
return mem;
}
inline cl_mem create_buffer_gc(
cl_context ctx,
cl_mem_flags flags,
size_t size,
void *host_ptr)
{
PYOPENCL_RETRY_RETURN_IF_MEM_ERROR(
return create_buffer(ctx, flags, size, host_ptr);
);
}
#if PYOPENCL_CL_VERSION >= 0x1010
inline cl_mem create_sub_buffer(
cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bct,
const void *buffer_create_info)
{
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCreateSubBuffer");
cl_mem mem = clCreateSubBuffer(buffer, flags,
bct, buffer_create_info, &status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clCreateSubBuffer", status_code);
return mem;
}
inline cl_mem create_sub_buffer_gc(
cl_mem buffer, cl_mem_flags flags, cl_buffer_create_type bct,
const void *buffer_create_info)
{
PYOPENCL_RETRY_RETURN_IF_MEM_ERROR(
return create_sub_buffer(buffer, flags, bct, buffer_create_info);
);
}
#endif
class buffer : public memory_object
{
public:
buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
: memory_object(mem, retain, std::move(hostbuf))
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
{ }
#if PYOPENCL_CL_VERSION >= 0x1010
buffer *get_sub_region(
size_t origin, size_t size, cl_mem_flags flags) const
{
cl_buffer_region region = { origin, size};
cl_mem mem = create_sub_buffer_gc(
data(), flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion);
try
{
return new buffer(mem, false);
}
catch (...)
{
PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem));
throw;
}
}
buffer *getitem(py::slice slc) const
{
PYOPENCL_BUFFER_SIZE_T start, end, stride, length;
size_t my_length;
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
(data(), CL_MEM_SIZE, sizeof(my_length), &my_length, 0));
#if PY_VERSION_HEX >= 0x03020000
if (PySlice_GetIndicesEx(slc.ptr(),
Andreas Klöckner
committed
my_length, &start, &end, &stride, &length) != 0)
throw py::error_already_set();
#else
if (PySlice_GetIndicesEx(reinterpret_cast<PySliceObject *>(slc.ptr()),
my_length, &start, &end, &stride, &length) != 0)
throw py::error_already_set();
Andreas Klöckner
committed
#endif
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
if (stride != 1)
throw pyopencl::error("Buffer.__getitem__", CL_INVALID_VALUE,
"Buffer slice must have stride 1");
cl_mem_flags my_flags;
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
(data(), CL_MEM_FLAGS, sizeof(my_flags), &my_flags, 0));
my_flags &= ~CL_MEM_COPY_HOST_PTR;
if (end <= start)
throw pyopencl::error("Buffer.__getitem__", CL_INVALID_VALUE,
"Buffer slice have end > start");
return get_sub_region(start, end-start, my_flags);
}
#endif
};
// {{{ buffer creation
inline
buffer *create_buffer_py(
context &ctx,
cl_mem_flags flags,
size_t size,
py::object py_hostbuf
)
{
if (py_hostbuf.ptr() != Py_None &&
!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))
PyErr_Warn(PyExc_UserWarning, "'hostbuf' was passed, "
"but no memory flags to make use of it.");
void *buf = 0;
std::unique_ptr<py_buffer_wrapper> retained_buf_obj;
if (py_hostbuf.ptr() != Py_None)
{
retained_buf_obj = std::unique_ptr<py_buffer_wrapper>(new py_buffer_wrapper);
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
int py_buf_flags = PyBUF_ANY_CONTIGUOUS;
if ((flags & CL_MEM_USE_HOST_PTR)
&& ((flags & CL_MEM_READ_WRITE)
|| (flags & CL_MEM_WRITE_ONLY)))
py_buf_flags |= PyBUF_WRITABLE;
retained_buf_obj->get(py_hostbuf.ptr(), py_buf_flags);
buf = retained_buf_obj->m_buf.buf;
if (size > size_t(retained_buf_obj->m_buf.len))
throw pyopencl::error("Buffer", CL_INVALID_VALUE,
"specified size is greater than host buffer size");
if (size == 0)
size = retained_buf_obj->m_buf.len;
}
cl_mem mem = create_buffer_gc(ctx.data(), flags, size, buf);
if (!(flags & CL_MEM_USE_HOST_PTR))
retained_buf_obj.reset();
try
{
return new buffer(mem, false, std::move(retained_buf_obj));
}
catch (...)
{
PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem));
throw;
}
}
// }}}
// {{{ buffer transfers
// {{{ byte-for-byte transfers
inline
event *enqueue_read_buffer(
command_queue &cq,
memory_object_holder &mem,
py::object buffer,
py::object py_wait_for,
bool is_blocking)
{
PYOPENCL_PARSE_WAIT_FOR;
void *buf;
PYOPENCL_BUFFER_SIZE_T len;
std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);
ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE);
buf = ward->m_buf.buf;
len = ward->m_buf.len;
cl_command_queue queue = cq.data();
cl_event evt;
PYOPENCL_RETRY_IF_MEM_ERROR(
PYOPENCL_CALL_GUARDED_THREADED(clEnqueueReadBuffer, (
mem.data(),
PYOPENCL_CAST_BOOL(is_blocking),
src_offset, len, buf,
PYOPENCL_WAITLIST_ARGS, &evt
))
);
PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward);
}
inline
event *enqueue_write_buffer(
command_queue &cq,
memory_object_holder &mem,
py::object buffer,
py::object py_wait_for,
bool is_blocking)
{
PYOPENCL_PARSE_WAIT_FOR;
const void *buf;
PYOPENCL_BUFFER_SIZE_T len;
std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);
ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS);
buf = ward->m_buf.buf;
len = ward->m_buf.len;
cl_command_queue queue = cq.data();
cl_event evt;
PYOPENCL_RETRY_IF_MEM_ERROR(
PYOPENCL_CALL_GUARDED_THREADED(clEnqueueWriteBuffer, (
mem.data(),
PYOPENCL_CAST_BOOL(is_blocking),
dst_offset, len, buf,
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
PYOPENCL_WAITLIST_ARGS, &evt
))
);
PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward);
}
inline
event *enqueue_copy_buffer(
command_queue &cq,
memory_object_holder &src,
memory_object_holder &dst,
ptrdiff_t byte_count,
size_t src_offset,
size_t dst_offset,
py::object py_wait_for)
{
PYOPENCL_PARSE_WAIT_FOR;
if (byte_count < 0)
{
size_t byte_count_src = 0;
size_t byte_count_dst = 0;
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
(src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_src, 0));
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
(src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_dst, 0));
byte_count = std::min(byte_count_src, byte_count_dst);
}
cl_event evt;
PYOPENCL_RETRY_IF_MEM_ERROR(
PYOPENCL_CALL_GUARDED(clEnqueueCopyBuffer, (
cq.data(),
src.data(), dst.data(),
src_offset, dst_offset,
byte_count,
PYOPENCL_WAITLIST_ARGS,
&evt
))
);
PYOPENCL_RETURN_NEW_EVENT(evt);
}
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
#ifdef CL_DEVICE_P2P_DEVICES_AMD
inline
event *enqueue_copy_buffer_p2p_amd(
platform &plat,
command_queue &cq,
memory_object_holder &src,
memory_object_holder &dst,
py::object py_byte_count,
py::object py_wait_for)
{
PYOPENCL_PARSE_WAIT_FOR;
ptrdiff_t byte_count = 0;
if (py_byte_count.ptr() == Py_None)
{
size_t byte_count_src = 0;
size_t byte_count_dst = 0;
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
(src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_src, 0));
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
(dst.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_dst, 0));
byte_count = std::min(byte_count_src, byte_count_dst);
}
else
{
byte_count = py::cast<ptrdiff_t>(py_byte_count);
}
clEnqueueCopyBufferP2PAMD_fn fn = (clEnqueueCopyBufferP2PAMD_fn)clGetExtensionFunctionAddressForPlatform(plat.data(), "clEnqueueCopyBufferP2PAMD");
if (!fn)
throw pyopencl::error("clGetExtensionFunctionAddressForPlatform", CL_INVALID_VALUE,
"clEnqueueCopyBufferP2PAMD is not available");
cl_event evt;
PYOPENCL_RETRY_IF_MEM_ERROR(
PYOPENCL_CALL_GUARDED(fn, (
cq.data(),
src.data(), dst.data(),
0, 0,
byte_count,
PYOPENCL_WAITLIST_ARGS,
&evt
))
);
PYOPENCL_RETURN_NEW_EVENT(evt);
}
#endif
// }}}
// {{{ rectangular transfers
#if PYOPENCL_CL_VERSION >= 0x1010
inline
event *enqueue_read_buffer_rect(
command_queue &cq,
memory_object_holder &mem,
py::object buffer,
py::object py_buffer_origin,
py::object py_host_origin,
py::object py_region,
Gregor Thalhammer
committed
py::object py_buffer_pitches,
py::object py_host_pitches,
py::object py_wait_for,
bool is_blocking
)
{
PYOPENCL_PARSE_WAIT_FOR;
COPY_PY_COORD_TRIPLE(buffer_origin);
COPY_PY_COORD_TRIPLE(host_origin);
COPY_PY_REGION_TRIPLE(region);
COPY_PY_PITCH_TUPLE(buffer_pitches);
COPY_PY_PITCH_TUPLE(host_pitches);
void *buf;
std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);
ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE);
buf = ward->m_buf.buf;
cl_command_queue queue = cq.data();
cl_event evt;
PYOPENCL_RETRY_IF_MEM_ERROR(
PYOPENCL_CALL_GUARDED_THREADED(clEnqueueReadBufferRect, (
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
mem.data(),
PYOPENCL_CAST_BOOL(is_blocking),
buffer_origin, host_origin, region,
buffer_pitches[0], buffer_pitches[1],
host_pitches[0], host_pitches[1],
buf,
PYOPENCL_WAITLIST_ARGS, &evt
))
);
PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward);
}
inline
event *enqueue_write_buffer_rect(
command_queue &cq,
memory_object_holder &mem,
py::object buffer,
py::object py_buffer_origin,
py::object py_host_origin,
py::object py_region,
py::object py_buffer_pitches,
py::object py_host_pitches,
py::object py_wait_for,
bool is_blocking
)
{
PYOPENCL_PARSE_WAIT_FOR;
COPY_PY_COORD_TRIPLE(buffer_origin);
COPY_PY_COORD_TRIPLE(host_origin);
COPY_PY_REGION_TRIPLE(region);
COPY_PY_PITCH_TUPLE(buffer_pitches);
COPY_PY_PITCH_TUPLE(host_pitches);
const void *buf;
std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);
ward->get(buffer.ptr(), PyBUF_ANY_CONTIGUOUS);
buf = ward->m_buf.buf;
cl_command_queue queue = cq.data();
cl_event evt;
PYOPENCL_RETRY_IF_MEM_ERROR(
PYOPENCL_CALL_GUARDED_THREADED(clEnqueueWriteBufferRect, (
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
mem.data(),
PYOPENCL_CAST_BOOL(is_blocking),
buffer_origin, host_origin, region,
buffer_pitches[0], buffer_pitches[1],
host_pitches[0], host_pitches[1],
buf,
PYOPENCL_WAITLIST_ARGS, &evt
))
);
PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, ward);
}
inline
event *enqueue_copy_buffer_rect(
command_queue &cq,
memory_object_holder &src,
memory_object_holder &dst,
py::object py_src_origin,
py::object py_dst_origin,
py::object py_region,
py::object py_src_pitches,
py::object py_dst_pitches,
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
py::object py_wait_for)
{
PYOPENCL_PARSE_WAIT_FOR;
COPY_PY_COORD_TRIPLE(src_origin);
COPY_PY_COORD_TRIPLE(dst_origin);
COPY_PY_REGION_TRIPLE(region);
COPY_PY_PITCH_TUPLE(src_pitches);
COPY_PY_PITCH_TUPLE(dst_pitches);
cl_event evt;
PYOPENCL_RETRY_IF_MEM_ERROR(
PYOPENCL_CALL_GUARDED(clEnqueueCopyBufferRect, (
cq.data(),
src.data(), dst.data(),
src_origin, dst_origin, region,
src_pitches[0], src_pitches[1],
dst_pitches[0], dst_pitches[1],
PYOPENCL_WAITLIST_ARGS,
&evt
))
);
PYOPENCL_RETURN_NEW_EVENT(evt);
}
#endif
// }}}
// }}}
#if PYOPENCL_CL_VERSION >= 0x1020
inline
event *enqueue_fill_buffer(
command_queue &cq,
memory_object_holder &mem,
py::object pattern,
size_t offset,
size_t size,
py::object py_wait_for
)
{
PYOPENCL_PARSE_WAIT_FOR;
const void *pattern_buf;
PYOPENCL_BUFFER_SIZE_T pattern_len;
std::unique_ptr<py_buffer_wrapper> ward(new py_buffer_wrapper);
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
ward->get(pattern.ptr(), PyBUF_ANY_CONTIGUOUS);
pattern_buf = ward->m_buf.buf;
pattern_len = ward->m_buf.len;
cl_event evt;
PYOPENCL_RETRY_IF_MEM_ERROR(
PYOPENCL_CALL_GUARDED(clEnqueueFillBuffer, (
cq.data(),
mem.data(),
pattern_buf, pattern_len, offset, size,
PYOPENCL_WAITLIST_ARGS, &evt
))
);
PYOPENCL_RETURN_NEW_EVENT(evt);
}
#endif
// }}}
// {{{ image
class image : public memory_object
{
public:
image(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
: memory_object(mem, retain, std::move(hostbuf))
{ }
py::object get_image_info(cl_image_info param_name) const
{
switch (param_name)
{
case CL_IMAGE_FORMAT:
PYOPENCL_GET_TYPED_INFO(Image, data(), param_name,
cl_image_format);
case CL_IMAGE_ELEMENT_SIZE:
case CL_IMAGE_ROW_PITCH:
case CL_IMAGE_SLICE_PITCH:
case CL_IMAGE_WIDTH:
case CL_IMAGE_HEIGHT:
case CL_IMAGE_DEPTH:
#if PYOPENCL_CL_VERSION >= 0x1020
case CL_IMAGE_ARRAY_SIZE:
#endif
PYOPENCL_GET_TYPED_INFO(Image, data(), param_name, size_t);
#if PYOPENCL_CL_VERSION >= 0x1020
case CL_IMAGE_BUFFER:
{
cl_mem param_value;
PYOPENCL_CALL_GUARDED(clGetImageInfo, \
(data(), param_name, sizeof(param_value), ¶m_value, 0));
if (param_value == 0)
{
// no associated memory object? no problem.
return create_mem_object_wrapper(param_value, /* retain */ true);
}
case CL_IMAGE_NUM_MIP_LEVELS:
case CL_IMAGE_NUM_SAMPLES:
PYOPENCL_GET_TYPED_INFO(Image, data(), param_name, cl_uint);
throw error("Image.get_image_info", CL_INVALID_VALUE);
}
}
};
// {{{ image formats
inline
cl_image_format *make_image_format(cl_channel_order ord, cl_channel_type tp)
{
std::unique_ptr<cl_image_format> result(new cl_image_format);
result->image_channel_order = ord;
result->image_channel_data_type = tp;
return result.release();
}
inline
py::list get_supported_image_formats(
context const &ctx,
cl_mem_flags flags,
cl_mem_object_type image_type)
{
cl_uint num_image_formats;
PYOPENCL_CALL_GUARDED(clGetSupportedImageFormats, (
ctx.data(), flags, image_type,
std::vector<cl_image_format> formats(num_image_formats);
PYOPENCL_CALL_GUARDED(clGetSupportedImageFormats, (
ctx.data(), flags, image_type,
formats.size(), formats.empty( ) ? nullptr : &formats.front(), nullptr));
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
PYOPENCL_RETURN_VECTOR(cl_image_format, formats);
}
inline
cl_uint get_image_format_channel_count(cl_image_format const &fmt)
{
switch (fmt.image_channel_order)
{
case CL_R: return 1;
case CL_A: return 1;
case CL_RG: return 2;
case CL_RA: return 2;
case CL_RGB: return 3;
case CL_RGBA: return 4;
case CL_BGRA: return 4;
case CL_INTENSITY: return 1;
case CL_LUMINANCE: return 1;
default:
throw pyopencl::error("ImageFormat.channel_dtype_size",
CL_INVALID_VALUE,
"unrecognized channel order");
}
}
inline
cl_uint get_image_format_channel_dtype_size(cl_image_format const &fmt)
{
switch (fmt.image_channel_data_type)
{
case CL_SNORM_INT8: return 1;
case CL_SNORM_INT16: return 2;
case CL_UNORM_INT8: return 1;
case CL_UNORM_INT16: return 2;
case CL_UNORM_SHORT_565: return 2;
case CL_UNORM_SHORT_555: return 2;
case CL_UNORM_INT_101010: return 4;
case CL_SIGNED_INT8: return 1;
case CL_SIGNED_INT16: return 2;
case CL_SIGNED_INT32: return 4;
case CL_UNSIGNED_INT8: return 1;
case CL_UNSIGNED_INT16: return 2;
case CL_UNSIGNED_INT32: return 4;
case CL_HALF_FLOAT: return 2;
case CL_FLOAT: return 4;
default:
throw pyopencl::error("ImageFormat.channel_dtype_size",
CL_INVALID_VALUE,
"unrecognized channel data type");
}
}
inline
cl_uint get_image_format_item_size(cl_image_format const &fmt)
{
return get_image_format_channel_count(fmt)
* get_image_format_channel_dtype_size(fmt);
}
// }}}
// {{{ image creation
inline
image *create_image(
context const &ctx,
cl_mem_flags flags,
cl_image_format const &fmt,
py::sequence shape,
py::sequence pitches,
py::object buffer)
{
if (shape.ptr() == Py_None)
throw pyopencl::error("Image", CL_INVALID_VALUE,
"'shape' must be given");
void *buf = 0;
std::unique_ptr<py_buffer_wrapper> retained_buf_obj;
if (buffer.ptr() != Py_None)
{
retained_buf_obj = std::unique_ptr<py_buffer_wrapper>(new py_buffer_wrapper);
int py_buf_flags = PyBUF_ANY_CONTIGUOUS;
if ((flags & CL_MEM_USE_HOST_PTR)
&& ((flags & CL_MEM_READ_WRITE)
|| (flags & CL_MEM_WRITE_ONLY)))
py_buf_flags |= PyBUF_WRITABLE;
retained_buf_obj->get(buffer.ptr(), py_buf_flags);
buf = retained_buf_obj->m_buf.buf;
len = retained_buf_obj->m_buf.len;
}
unsigned dims = py::len(shape);
cl_int status_code;
cl_mem mem;
if (dims == 2)
{
size_t width = py::cast<size_t>(shape[0]);
size_t height = py::cast<size_t>(shape[1]);
size_t pitch = 0;
if (pitches.ptr() != Py_None)
{
if (py::len(pitches) != 1)
throw pyopencl::error("Image", CL_INVALID_VALUE,