Newer
Older
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
PYOPENCL_GET_VEC_INFO(KernelWorkGroup,
PYOPENCL_FIRST_ARG, param_name, result);
PYOPENCL_RETURN_VECTOR(size_t, result);
}
case CL_KERNEL_LOCAL_MEM_SIZE:
#if PYOPENCL_CL_VERSION >= 0x1010
case CL_KERNEL_PRIVATE_MEM_SIZE:
#endif
PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup,
PYOPENCL_FIRST_ARG, param_name,
cl_ulong);
#if PYOPENCL_CL_VERSION >= 0x1010
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
PYOPENCL_GET_INTEGRAL_INFO(KernelWorkGroup,
PYOPENCL_FIRST_ARG, param_name,
size_t);
#endif
default:
throw error("Kernel.get_work_group_info", CL_INVALID_VALUE);
#undef PYOPENCL_FIRST_ARG
}
}
#if PYOPENCL_CL_VERSION >= 0x1020
py::object get_arg_info(
cl_uint arg_index,
cl_kernel_arg_info param_name
) const
{
switch (param_name)
{
#define PYOPENCL_FIRST_ARG m_kernel, arg_index // hackety hack
case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
PYOPENCL_GET_INTEGRAL_INFO(KernelArg,
PYOPENCL_FIRST_ARG, param_name,
cl_kernel_arg_address_qualifier);
case CL_KERNEL_ARG_ACCESS_QUALIFIER:
PYOPENCL_GET_INTEGRAL_INFO(KernelArg,
PYOPENCL_FIRST_ARG, param_name,
cl_kernel_arg_access_qualifier);
case CL_KERNEL_ARG_TYPE_NAME:
case CL_KERNEL_ARG_NAME:
PYOPENCL_GET_STR_INFO(KernelArg, PYOPENCL_FIRST_ARG, param_name);
#undef PYOPENCL_FIRST_ARG
default:
throw error("Kernel.get_arg_info", CL_INVALID_VALUE);
}
}
#endif
};
inline
py::list create_kernels_in_program(program &pgm)
{
cl_uint num_kernels;
PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, (
pgm.data(), 0, 0, &num_kernels));
std::vector<cl_kernel> kernels(num_kernels);
PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, (
pgm.data(), num_kernels,
kernels.empty( ) ? nullptr : &kernels.front(), &num_kernels));
for (cl_kernel knl: kernels)
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
result.append(handle_from_new_ptr(new kernel(knl, true)));
return result;
}
inline
event *enqueue_nd_range_kernel(
command_queue &cq,
kernel &knl,
py::object py_global_work_size,
py::object py_local_work_size,
py::object py_global_work_offset,
py::object py_wait_for,
bool g_times_l)
{
PYOPENCL_PARSE_WAIT_FOR;
cl_uint work_dim = len(py_global_work_size);
std::vector<size_t> global_work_size;
COPY_PY_LIST(size_t, global_work_size);
size_t *local_work_size_ptr = 0;
std::vector<size_t> local_work_size;
if (py_local_work_size.ptr() != Py_None)
{
if (g_times_l)
work_dim = std::max(work_dim, unsigned(len(py_local_work_size)));
else
if (work_dim != unsigned(len(py_local_work_size)))
throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
"global/local work sizes have differing dimensions");
COPY_PY_LIST(size_t, local_work_size);
while (local_work_size.size() < work_dim)
local_work_size.push_back(1);
while (global_work_size.size() < work_dim)
global_work_size.push_back(1);
local_work_size_ptr = local_work_size.empty( ) ? nullptr : &local_work_size.front();
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
}
if (g_times_l && local_work_size_ptr)
{
for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
global_work_size[work_axis] *= local_work_size[work_axis];
}
size_t *global_work_offset_ptr = 0;
std::vector<size_t> global_work_offset;
if (py_global_work_offset.ptr() != Py_None)
{
if (work_dim != unsigned(len(py_global_work_offset)))
throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
"global work size and offset have differing dimensions");
COPY_PY_LIST(size_t, global_work_offset);
if (g_times_l && local_work_size_ptr)
{
for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
global_work_offset[work_axis] *= local_work_size[work_axis];
}
global_work_offset_ptr = global_work_offset.empty( ) ? nullptr : &global_work_offset.front();
}
PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( {
cl_event evt;
PYOPENCL_CALL_GUARDED(clEnqueueNDRangeKernel, (
cq.data(),
knl.data(),
work_dim,
global_work_offset_ptr,
global_work_size.empty( ) ? nullptr : &global_work_size.front(),
local_work_size_ptr,
PYOPENCL_WAITLIST_ARGS, &evt
));
PYOPENCL_RETURN_NEW_EVENT(evt);
} );
}
// }}}
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
// {{{ gl interop
inline
bool have_gl()
{
#ifdef HAVE_GL
return true;
#else
return false;
#endif
}
#ifdef HAVE_GL
#ifdef __APPLE__
inline
cl_context_properties get_apple_cgl_share_group()
{
CGLContextObj kCGLContext = CGLGetCurrentContext();
CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
return (cl_context_properties) kCGLShareGroup;
}
#endif /* __APPLE__ */
class gl_buffer : public memory_object
{
public:
gl_buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
: memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf))
{ }
};
class gl_renderbuffer : public memory_object
{
public:
gl_renderbuffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
: memory_object(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf))
{ }
};
class gl_texture : public image
{
public:
gl_texture(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
: image(mem, retain, PYOPENCL_STD_MOVE_IF_NEW_BUF_INTF(hostbuf))
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
{ }
py::object get_gl_texture_info(cl_gl_texture_info param_name)
{
switch (param_name)
{
case CL_GL_TEXTURE_TARGET:
PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLenum);
case CL_GL_MIPMAP_LEVEL:
PYOPENCL_GET_INTEGRAL_INFO(GLTexture, data(), param_name, GLint);
default:
throw error("MemoryObject.get_gl_texture_info", CL_INVALID_VALUE);
}
}
};
#define PYOPENCL_WRAP_BUFFER_CREATOR(TYPE, NAME, CL_NAME, ARGS, CL_ARGS) \
inline \
TYPE *NAME ARGS \
{ \
cl_int status_code; \
PYOPENCL_PRINT_CALL_TRACE(#CL_NAME); \
cl_mem mem = CL_NAME CL_ARGS; \
\
if (status_code != CL_SUCCESS) \
throw pyopencl::error(#CL_NAME, status_code); \
\
try \
{ \
return new TYPE(mem, false); \
} \
catch (...) \
{ \
PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); \
throw; \
} \
}
PYOPENCL_WRAP_BUFFER_CREATOR(gl_buffer,
create_from_gl_buffer, clCreateFromGLBuffer,
(context &ctx, cl_mem_flags flags, GLuint bufobj),
(ctx.data(), flags, bufobj, &status_code));
PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture,
create_from_gl_texture_2d, clCreateFromGLTexture2D,
(context &ctx, cl_mem_flags flags,
GLenum texture_target, GLint miplevel, GLuint texture),
(ctx.data(), flags, texture_target, miplevel, texture, &status_code));
PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture,
create_from_gl_texture_3d, clCreateFromGLTexture3D,
(context &ctx, cl_mem_flags flags,
GLenum texture_target, GLint miplevel, GLuint texture),
(ctx.data(), flags, texture_target, miplevel, texture, &status_code));
PYOPENCL_WRAP_BUFFER_CREATOR(gl_renderbuffer,
create_from_gl_renderbuffer, clCreateFromGLRenderbuffer,
(context &ctx, cl_mem_flags flags, GLuint renderbuffer),
(ctx.data(), flags, renderbuffer, &status_code));
inline
gl_texture *create_from_gl_texture(
context &ctx, cl_mem_flags flags,
GLenum texture_target, GLint miplevel,
GLuint texture, unsigned dims)
{
if (dims == 2)
return create_from_gl_texture_2d(ctx, flags, texture_target, miplevel, texture);
else if (dims == 3)
return create_from_gl_texture_3d(ctx, flags, texture_target, miplevel, texture);
else
throw pyopencl::error("Image", CL_INVALID_VALUE,
"invalid dimension");
}
inline
py::tuple get_gl_object_info(memory_object_holder const &mem)
{
cl_gl_object_type otype;
GLuint gl_name;
PYOPENCL_CALL_GUARDED(clGetGLObjectInfo, (mem.data(), &otype, &gl_name));
return py::make_tuple(otype, gl_name);
}
#define WRAP_GL_ENQUEUE(what, What) \
inline \
event *enqueue_##what##_gl_objects( \
command_queue &cq, \
py::object py_mem_objects, \
py::object py_wait_for) \
{ \
PYOPENCL_PARSE_WAIT_FOR; \
\
std::vector<cl_mem> mem_objects; \
for (py::handle mo: py_mem_objects) \
mem_objects.push_back((mo).cast<memory_object_holder &>().data()); \
\
cl_event evt; \
PYOPENCL_CALL_GUARDED(clEnqueue##What##GLObjects, ( \
cq.data(), \
mem_objects.size(), mem_objects.empty( ) ? nullptr : &mem_objects.front(), \
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
PYOPENCL_WAITLIST_ARGS, &evt \
)); \
\
PYOPENCL_RETURN_NEW_EVENT(evt); \
}
WRAP_GL_ENQUEUE(acquire, Acquire);
WRAP_GL_ENQUEUE(release, Release);
#endif
#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1)
inline
py::object get_gl_context_info_khr(
py::object py_properties,
cl_gl_context_info param_name,
py::object py_platform
)
{
std::vector<cl_context_properties> props
= parse_context_properties(py_properties);
typedef CL_API_ENTRY cl_int (CL_API_CALL
*func_ptr_type)(const cl_context_properties * /* properties */,
cl_gl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
func_ptr_type func_ptr;
#if PYOPENCL_CL_VERSION >= 0x1020
if (py_platform.ptr() != Py_None)
{
platform &plat = (py_platform).cast<platform &>();
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
func_ptr = (func_ptr_type) clGetExtensionFunctionAddressForPlatform(
plat.data(), "clGetGLContextInfoKHR");
}
else
{
PYOPENCL_DEPRECATED("get_gl_context_info_khr with platform=None", "2013.1", );
func_ptr = (func_ptr_type) clGetExtensionFunctionAddress(
"clGetGLContextInfoKHR");
}
#else
func_ptr = (func_ptr_type) clGetExtensionFunctionAddress(
"clGetGLContextInfoKHR");
#endif
if (!func_ptr)
throw error("Context.get_info", CL_INVALID_PLATFORM,
"clGetGLContextInfoKHR extension function not present");
cl_context_properties *props_ptr
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
switch (param_name)
{
case CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR:
{
cl_device_id param_value;
PYOPENCL_CALL_GUARDED(func_ptr,
(props_ptr, param_name, sizeof(param_value), ¶m_value, 0));
return py::object(handle_from_new_ptr( \
new device(param_value, /*retain*/ true)));
}
case CL_DEVICES_FOR_GL_CONTEXT_KHR:
{
size_t size;
PYOPENCL_CALL_GUARDED(func_ptr,
(props_ptr, param_name, 0, 0, &size));
std::vector<cl_device_id> devices;
devices.resize(size / sizeof(devices.front()));
PYOPENCL_CALL_GUARDED(func_ptr,
(props_ptr, param_name, size,
devices.empty( ) ? nullptr : &devices.front(), &size));
for (cl_device_id did: devices)
result.append(handle_from_new_ptr(
new device(did)));
return result;
}
default:
throw error("get_gl_context_info_khr", CL_INVALID_VALUE);
}
}
#endif
// }}}
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
// {{{ deferred implementation bits
inline py::object create_mem_object_wrapper(cl_mem mem)
{
cl_mem_object_type mem_obj_type;
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \
(mem, CL_MEM_TYPE, sizeof(mem_obj_type), &mem_obj_type, 0));
switch (mem_obj_type)
{
case CL_MEM_OBJECT_BUFFER:
return py::object(handle_from_new_ptr(
new buffer(mem, /*retain*/ true)));
case CL_MEM_OBJECT_IMAGE2D:
case CL_MEM_OBJECT_IMAGE3D:
#if PYOPENCL_CL_VERSION >= 0x1020
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
case CL_MEM_OBJECT_IMAGE1D:
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
#endif
return py::object(handle_from_new_ptr(
new image(mem, /*retain*/ true)));
default:
return py::object(handle_from_new_ptr(
new memory_object(mem, /*retain*/ true)));
}
}
inline
py::object memory_object_from_int(intptr_t cl_mem_as_int)
{
return create_mem_object_wrapper((cl_mem) cl_mem_as_int);
}
inline
py::object memory_object_holder::get_info(cl_mem_info param_name) const
{
switch (param_name)
{
case CL_MEM_TYPE:
PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
cl_mem_object_type);
case CL_MEM_FLAGS:
PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
cl_mem_flags);
case CL_MEM_SIZE:
PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
size_t);
case CL_MEM_HOST_PTR:
throw pyopencl::error("MemoryObject.get_info", CL_INVALID_VALUE,
"Use MemoryObject.get_host_array to get host pointer.");
case CL_MEM_MAP_COUNT:
PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
cl_uint);
case CL_MEM_REFERENCE_COUNT:
PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
cl_uint);
case CL_MEM_CONTEXT:
PYOPENCL_GET_OPAQUE_INFO(MemObject, data(), param_name,
cl_context, context);
#if PYOPENCL_CL_VERSION >= 0x1010
case CL_MEM_ASSOCIATED_MEMOBJECT:
{
cl_mem param_value;
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, \
(data(), param_name, sizeof(param_value), ¶m_value, 0));
if (param_value == 0)
{
// no associated memory object? no problem.
}
return create_mem_object_wrapper(param_value);
}
case CL_MEM_OFFSET:
PYOPENCL_GET_INTEGRAL_INFO(MemObject, data(), param_name,
size_t);
#endif
default:
throw error("MemoryObjectHolder.get_info", CL_INVALID_VALUE);
}
}
inline
py::object get_mem_obj_host_array(
py::object mem_obj_py,
py::object shape, py::object dtype,
py::object order_py)
{
memory_object_holder const &mem_obj =
(mem_obj_py).cast<memory_object_holder const &>();
PyArray_Descr *tp_descr;
if (PyArray_DescrConverter(dtype.ptr(), &tp_descr) != NPY_SUCCEED)
throw py::error_already_set();
cl_mem_flags mem_flags;
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
(mem_obj.data(), CL_MEM_FLAGS, sizeof(mem_flags), &mem_flags, 0));
if (!(mem_flags & CL_MEM_USE_HOST_PTR))
throw pyopencl::error("MemoryObject.get_host_array", CL_INVALID_VALUE,
"Only MemoryObject with USE_HOST_PTR "
"is supported.");
std::vector<npy_intp> dims;
try
{
dims.push_back(py::cast<npy_intp>(shape));
}
catch (py::cast_error &)
{
for (auto it: shape)
dims.push_back(it.cast<npy_intp>());
}
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
NPY_ORDER order = PyArray_CORDER;
PyArray_OrderConverter(order_py.ptr(), &order);
int ary_flags = 0;
if (order == PyArray_FORTRANORDER)
ary_flags |= NPY_FARRAY;
else if (order == PyArray_CORDER)
ary_flags |= NPY_CARRAY;
else
throw std::runtime_error("unrecognized order specifier");
void *host_ptr;
size_t mem_obj_size;
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
(mem_obj.data(), CL_MEM_HOST_PTR, sizeof(host_ptr),
&host_ptr, 0));
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
(mem_obj.data(), CL_MEM_SIZE, sizeof(mem_obj_size),
&mem_obj_size, 0));
py::object result = py::reinterpret_steal<py::object>(PyArray_NewFromDescr(
dims.size(), &dims.front(), /*strides*/ nullptr,
host_ptr, ary_flags, /*obj*/nullptr));
if ((size_t) PyArray_NBYTES(result.ptr()) > mem_obj_size)
throw pyopencl::error("MemoryObject.get_host_array",
CL_INVALID_VALUE,
"Resulting array is larger than memory object.");
PyArray_BASE(result.ptr()) = mem_obj_py.ptr();