Newer
Older
void compile(std::string options, py::object py_devices,
py::object py_headers)
{
PYOPENCL_PARSE_PY_DEVICES;
// {{{ pick apart py_headers
// py_headers is a list of tuples *(name, program)*
std::vector<std::string> header_names;
std::vector<cl_program> programs;
for (py::handle name_hdr_tup_py: py_headers)
py::tuple name_hdr_tup = py::reinterpret_borrow<py::tuple>(name_hdr_tup_py);
if (py::len(name_hdr_tup) != 2)
throw error("Program.compile", CL_INVALID_VALUE,
"epxected (name, header) tuple in headers list");
std::string name = (name_hdr_tup[0]).cast<std::string>();
program &prg = (name_hdr_tup[1]).cast<program &>();
header_names.push_back(name);
programs.push_back(prg.data());
}
std::vector<const char *> header_name_ptrs;
for (std::string const &name: header_names)
header_name_ptrs.push_back(name.c_str());
// }}}
PYOPENCL_CALL_GUARDED_THREADED(clCompileProgram,
(m_program, num_devices, devices,
options.c_str(), header_names.size(),
programs.empty() ? nullptr : &programs.front(),
header_name_ptrs.empty() ? nullptr : &header_name_ptrs.front(),
#if PYOPENCL_CL_VERSION >= 0x2020
void set_specialization_constant(cl_uint spec_id, py::object py_buffer)
{
py_buffer_wrapper bufwrap;
bufwrap.get(py_buffer.ptr(), PyBUF_ANY_CONTIGUOUS);
PYOPENCL_CALL_GUARDED(clSetProgramSpecializationConstant,
(m_program, spec_id, bufwrap.m_buf.len, bufwrap.m_buf.buf));
}
#endif
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
};
inline
program *create_program_with_source(
context &ctx,
std::string const &src)
{
const char *string = src.c_str();
size_t length = src.size();
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithSource");
cl_program result = clCreateProgramWithSource(
ctx.data(), 1, &string, &length, &status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clCreateProgramWithSource", status_code);
try
{
return new program(result, false, program::KND_SOURCE);
}
catch (...)
{
clReleaseProgram(result);
throw;
}
}
inline
program *create_program_with_binary(
context &ctx,
py::sequence py_devices,
py::sequence py_binaries)
{
std::vector<cl_device_id> devices;
std::vector<const unsigned char *> binaries;
std::vector<size_t> sizes;
size_t num_devices = len(py_devices);
if (len(py_binaries) != num_devices)
throw error("create_program_with_binary", CL_INVALID_VALUE,
"device and binary counts don't match");
for (size_t i = 0; i < num_devices; ++i)
(py_devices[i]).cast<device const &>().data());
const void *buf;
PYOPENCL_BUFFER_SIZE_T len;
py_buffer_wrapper buf_wrapper;
buf_wrapper.get(py::object(py_binaries[i]).ptr(), PyBUF_ANY_CONTIGUOUS);
buf = buf_wrapper.m_buf.buf;
len = buf_wrapper.m_buf.len;
binaries.push_back(reinterpret_cast<const unsigned char *>(buf));
sizes.push_back(len);
}
Andreas Klöckner
committed
PYOPENCL_STACK_CONTAINER(cl_int, binary_statuses, num_devices);
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBinary");
cl_program result = clCreateProgramWithBinary(
ctx.data(), num_devices,
devices.empty( ) ? nullptr : &devices.front(),
sizes.empty( ) ? nullptr : &sizes.front(),
binaries.empty( ) ? nullptr : &binaries.front(),
Andreas Klöckner
committed
PYOPENCL_STACK_CONTAINER_GET_PTR(binary_statuses),
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
&status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clCreateProgramWithBinary", status_code);
/*
for (int i = 0; i < num_devices; ++i)
printf("%d:%d\n", i, binary_statuses[i]);
*/
try
{
return new program(result, false, program::KND_BINARY);
}
catch (...)
{
clReleaseProgram(result);
throw;
}
}
#if (PYOPENCL_CL_VERSION >= 0x1020) || \
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
((PYOPENCL_CL_VERSION >= 0x1030) && defined(__APPLE__))
inline
program *create_program_with_built_in_kernels(
context &ctx,
py::object py_devices,
std::string const &kernel_names)
{
PYOPENCL_PARSE_PY_DEVICES;
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBuiltInKernels");
cl_program result = clCreateProgramWithBuiltInKernels(
ctx.data(), num_devices, devices,
kernel_names.c_str(), &status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clCreateProgramWithBuiltInKernels", status_code);
try
{
return new program(result, false);
}
catch (...)
{
clReleaseProgram(result);
throw;
}
}
#endif
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
#if (PYOPENCL_CL_VERSION >= 0x2010)
inline
program *create_program_with_il(
context &ctx,
std::string const &src)
{
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithIL");
cl_program result = clCreateProgramWithIL(
ctx.data(), src.c_str(), src.size(), &status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clCreateProgramWithIL", status_code);
try
{
return new program(result, false, program::KND_IL);
}
catch (...)
{
clReleaseProgram(result);
throw;
}
}
#endif
#if PYOPENCL_CL_VERSION >= 0x1020
inline
program *link_program(
context &ctx,
py::object py_programs,
std::string const &options,
py::object py_devices
)
{
PYOPENCL_PARSE_PY_DEVICES;
std::vector<cl_program> programs;
for (py::handle py_prg: py_programs)
program &prg = (py_prg).cast<program &>();
programs.push_back(prg.data());
}
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clLinkProgram");
cl_program result = clLinkProgram(
ctx.data(), num_devices, devices,
options.c_str(),
programs.size(),
0, 0,
&status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clLinkProgram", result, status_code);
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
try
{
return new program(result, false);
}
catch (...)
{
clReleaseProgram(result);
throw;
}
}
#endif
#if PYOPENCL_CL_VERSION >= 0x1020
inline
void unload_platform_compiler(platform &plat)
{
PYOPENCL_CALL_GUARDED(clUnloadPlatformCompiler, (plat.data()));
}
#endif
// }}}
// {{{ kernel
class local_memory
{
private:
size_t m_size;
public:
local_memory(size_t size)
: m_size(size)
{ }
size_t size() const
{ return m_size; }
};
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
{
private:
cl_kernel m_kernel;
public:
kernel(cl_kernel knl, bool retain)
: m_kernel(knl)
{
if (retain)
PYOPENCL_CALL_GUARDED(clRetainKernel, (knl));
}
kernel(program const &prg, std::string const &kernel_name)
{
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCreateKernel");
m_kernel = clCreateKernel(prg.data(), kernel_name.c_str(),
&status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clCreateKernel", status_code);
}
~kernel()
{
PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseKernel, (m_kernel));
}
cl_kernel data() const
{
return m_kernel;
}
PYOPENCL_EQUALITY_TESTS(kernel);
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
#if PYOPENCL_CL_VERSION >= 0x2010
kernel *clone()
{
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCloneKernel");
cl_kernel result = clCloneKernel(m_kernel, &status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clCloneKernel", status_code);
try
{
return new kernel(result, /* retain */ false);
}
catch (...)
{
PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseKernel, (result));
throw;
}
}
#endif
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
void set_arg_null(cl_uint arg_index)
{
cl_mem m = 0;
PYOPENCL_CALL_GUARDED(clSetKernelArg, (m_kernel, arg_index,
sizeof(cl_mem), &m));
}
void set_arg_mem(cl_uint arg_index, memory_object_holder &moh)
{
cl_mem m = moh.data();
PYOPENCL_CALL_GUARDED(clSetKernelArg,
(m_kernel, arg_index, sizeof(cl_mem), &m));
}
void set_arg_local(cl_uint arg_index, local_memory const &loc)
{
PYOPENCL_CALL_GUARDED(clSetKernelArg,
(m_kernel, arg_index, loc.size(), 0));
}
void set_arg_sampler(cl_uint arg_index, sampler const &smp)
{
cl_sampler s = smp.data();
PYOPENCL_CALL_GUARDED(clSetKernelArg,
(m_kernel, arg_index, sizeof(cl_sampler), &s));
}
void set_arg_command_queue(cl_uint arg_index, command_queue const &queue)
{
cl_command_queue q = queue.data();
PYOPENCL_CALL_GUARDED(clSetKernelArg,
(m_kernel, arg_index, sizeof(cl_command_queue), &q));
}
Andreas Klöckner
committed
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
void set_arg_buf_pack(cl_uint arg_index, py::handle py_typechar, py::handle obj)
{
#define PYOPENCL_KERNEL_PACK_AND_SET_ARG(TYPECH_VAL, TYPE) \
case TYPECH_VAL: \
{ \
TYPE val = py::cast<TYPE>(obj); \
PYOPENCL_CALL_GUARDED(clSetKernelArg, (m_kernel, arg_index, sizeof(val), &val)); \
break; \
}
/* This is an internal interface that assumes it gets fed well-formed
* data. No meaningful error checking is being performed on
* py_typechar, on purpose.
*/
switch (*PyBytes_AS_STRING(py_typechar.ptr()))
{
PYOPENCL_KERNEL_PACK_AND_SET_ARG('c', char)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('b', signed char)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('B', unsigned char)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('h', short)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('H', unsigned short)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('i', int)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('I', unsigned int)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('l', long)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('L', unsigned long)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('f', float)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('d', double)
default:
throw error("Kernel.set_arg_buf_pack", CL_INVALID_VALUE,
"invalid type char");
}
#undef PYOPENCL_KERNEL_PACK_AND_SET_ARG
}
void set_arg_buf(cl_uint arg_index, py::handle py_buffer)
{
const void *buf;
PYOPENCL_BUFFER_SIZE_T len;
py_buffer_wrapper buf_wrapper;
try
{
buf_wrapper.get(py_buffer.ptr(), PyBUF_ANY_CONTIGUOUS);
}
catch (py::error_already_set &)
{
PyErr_Clear();
throw error("Kernel.set_arg", CL_INVALID_VALUE,
"invalid kernel argument");
}
buf = buf_wrapper.m_buf.buf;
len = buf_wrapper.m_buf.len;
PYOPENCL_CALL_GUARDED(clSetKernelArg,
(m_kernel, arg_index, len, buf));
}
#if PYOPENCL_CL_VERSION >= 0x2000
void set_arg_svm(cl_uint arg_index, svm_arg_wrapper const &wrp)
{
PYOPENCL_CALL_GUARDED(clSetKernelArgSVMPointer,
(m_kernel, arg_index, wrp.ptr()));
}
#endif
void set_arg(cl_uint arg_index, py::handle arg)
{
if (arg.ptr() == Py_None)
{
set_arg_null(arg_index);
return;
}
set_arg_mem(arg_index, arg.cast<memory_object_holder &>());
#if PYOPENCL_CL_VERSION >= 0x2000
try
{
set_arg_svm(arg_index, arg.cast<svm_arg_wrapper const &>());
return;
}
catch (py::cast_error &) { }
#endif
set_arg_local(arg_index, arg.cast<local_memory>());
set_arg_sampler(arg_index, arg.cast<const sampler &>());
try
{
set_arg_command_queue(arg_index, arg.cast<const command_queue &>());
return;
}
catch (py::cast_error &) { }
set_arg_buf(arg_index, arg);
}
py::object get_info(cl_kernel_info param_name) const
{
switch (param_name)
{
case CL_KERNEL_FUNCTION_NAME:
PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name);
case CL_KERNEL_NUM_ARGS:
case CL_KERNEL_REFERENCE_COUNT:
PYOPENCL_GET_TYPED_INFO(Kernel, m_kernel, param_name,
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
cl_uint);
case CL_KERNEL_CONTEXT:
PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name,
cl_context, context);
case CL_KERNEL_PROGRAM:
PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name,
cl_program, program);
#if PYOPENCL_CL_VERSION >= 0x1020
case CL_KERNEL_ATTRIBUTES:
PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name);
#endif
default:
throw error("Kernel.get_info", CL_INVALID_VALUE);
}
}
py::object get_work_group_info(
cl_kernel_work_group_info param_name,
device const &dev
) const
{
switch (param_name)
{
#define PYOPENCL_FIRST_ARG m_kernel, dev.data() // hackety hack
case CL_KERNEL_WORK_GROUP_SIZE:
PYOPENCL_GET_TYPED_INFO(KernelWorkGroup,
PYOPENCL_FIRST_ARG, param_name,
size_t);
case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
{
std::vector<size_t> result;
PYOPENCL_GET_VEC_INFO(KernelWorkGroup,
PYOPENCL_FIRST_ARG, param_name, result);
PYOPENCL_RETURN_VECTOR(size_t, result);
}
case CL_KERNEL_LOCAL_MEM_SIZE:
#if PYOPENCL_CL_VERSION >= 0x1010
case CL_KERNEL_PRIVATE_MEM_SIZE:
#endif
PYOPENCL_GET_TYPED_INFO(KernelWorkGroup,
PYOPENCL_FIRST_ARG, param_name,
cl_ulong);
#if PYOPENCL_CL_VERSION >= 0x1010
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
PYOPENCL_GET_TYPED_INFO(KernelWorkGroup,
PYOPENCL_FIRST_ARG, param_name,
size_t);
#endif
default:
throw error("Kernel.get_work_group_info", CL_INVALID_VALUE);
#undef PYOPENCL_FIRST_ARG
}
}
#if PYOPENCL_CL_VERSION >= 0x1020
py::object get_arg_info(
cl_uint arg_index,
cl_kernel_arg_info param_name
) const
{
switch (param_name)
{
#define PYOPENCL_FIRST_ARG m_kernel, arg_index // hackety hack
case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
PYOPENCL_GET_TYPED_INFO(KernelArg,
PYOPENCL_FIRST_ARG, param_name,
cl_kernel_arg_address_qualifier);
case CL_KERNEL_ARG_ACCESS_QUALIFIER:
PYOPENCL_GET_TYPED_INFO(KernelArg,
PYOPENCL_FIRST_ARG, param_name,
cl_kernel_arg_access_qualifier);
case CL_KERNEL_ARG_TYPE_NAME:
case CL_KERNEL_ARG_NAME:
PYOPENCL_GET_STR_INFO(KernelArg, PYOPENCL_FIRST_ARG, param_name);
case CL_KERNEL_ARG_TYPE_QUALIFIER:
PYOPENCL_GET_TYPED_INFO(KernelArg,
PYOPENCL_FIRST_ARG, param_name,
cl_kernel_arg_type_qualifier);
#undef PYOPENCL_FIRST_ARG
default:
throw error("Kernel.get_arg_info", CL_INVALID_VALUE);
}
}
#endif
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
#if PYOPENCL_CL_VERSION >= 0x2010
py::object get_sub_group_info(
device const &dev,
cl_kernel_sub_group_info param_name,
py::object py_input_value)
{
switch (param_name)
{
// size_t * -> size_t
case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE:
case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE:
{
std::vector<size_t> input_value;
COPY_PY_LIST(size_t, input_value);
size_t param_value;
PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo,
(m_kernel, dev.data(), param_name,
input_value.size()*sizeof(input_value.front()),
input_value.empty() ? nullptr : &input_value.front(),
sizeof(param_value), ¶m_value, 0));
return py::cast(param_value);
}
// size_t -> size_t[]
case CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT:
{
size_t input_value = py::cast<size_t>(py_input_value);
std::vector<size_t> result;
size_t size;
PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo,
(m_kernel, dev.data(), param_name,
sizeof(input_value), &input_value,
0, nullptr, &size));
result.resize(size / sizeof(result.front()));
PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo,
(m_kernel, dev.data(), param_name,
sizeof(input_value), &input_value,
size, result.empty() ? nullptr : &result.front(), 0));
PYOPENCL_RETURN_VECTOR(size_t, result);
}
// () -> size_t
case CL_KERNEL_MAX_NUM_SUB_GROUPS:
case CL_KERNEL_COMPILE_NUM_SUB_GROUPS:
{
size_t param_value;
PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo,
(m_kernel, dev.data(), param_name,
0, nullptr,
sizeof(param_value), ¶m_value, 0));
return py::cast(param_value);
}
default:
throw error("Kernel.get_sub_group_info", CL_INVALID_VALUE);
}
}
#endif
Andreas Klöckner
committed
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
#define PYOPENCL_KERNEL_SET_ARG_MULTI_ERROR_HANDLER \
catch (error &err) \
{ \
std::string msg( \
std::string("when processing arg#") + std::to_string(arg_index+1) \
+ std::string(" (1-based): ") + std::string(err.what())); \
auto mod_cl_ary(py::module::import("pyopencl.array")); \
auto cls_array(mod_cl_ary.attr("Array")); \
if (arg_value.ptr() && py::isinstance(arg_value, cls_array)) \
msg.append( \
" (perhaps you meant to pass 'array.data' instead of the array itself?)"); \
throw error(err.routine().c_str(), err.code(), msg.c_str()); \
} \
catch (std::exception &err) \
{ \
std::string msg( \
std::string("when processing arg#") + std::to_string(arg_index+1) \
+ std::string(" (1-based): ") + std::string(err.what())); \
throw std::runtime_error(msg.c_str()); \
}
inline
void set_arg_multi(
std::function<void(cl_uint, py::handle)> set_arg_func,
py::tuple args_and_indices)
{
cl_uint arg_index;
py::handle arg_value;
auto it = args_and_indices.begin(), end = args_and_indices.end();
try
{
/* This is an internal interface that assumes it gets fed well-formed
* data. No meaningful error checking is being performed on
* off-interval exhaustion of the iterator, on purpose.
*/
while (it != end)
{
// special value in case integer cast fails
arg_index = 9999 - 1;
arg_index = py::cast<cl_uint>(*it++);
arg_value = *it++;
set_arg_func(arg_index, arg_value);
}
}
PYOPENCL_KERNEL_SET_ARG_MULTI_ERROR_HANDLER
}
inline
void set_arg_multi(
std::function<void(cl_uint, py::handle, py::handle)> set_arg_func,
py::tuple args_and_indices)
{
cl_uint arg_index;
py::handle arg_descr, arg_value;
auto it = args_and_indices.begin(), end = args_and_indices.end();
try
{
/* This is an internal interface that assumes it gets fed well-formed
* data. No meaningful error checking is being performed on
* off-interval exhaustion of the iterator, on purpose.
*/
while (it != end)
{
// special value in case integer cast fails
arg_index = 9999 - 1;
arg_index = py::cast<cl_uint>(*it++);
arg_descr = *it++;
arg_value = *it++;
set_arg_func(arg_index, arg_descr, arg_value);
}
}
PYOPENCL_KERNEL_SET_ARG_MULTI_ERROR_HANDLER
}
inline
py::list create_kernels_in_program(program &pgm)
{
cl_uint num_kernels;
PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, (
pgm.data(), 0, 0, &num_kernels));
std::vector<cl_kernel> kernels(num_kernels);
PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, (
pgm.data(), num_kernels,
kernels.empty( ) ? nullptr : &kernels.front(), &num_kernels));
for (cl_kernel knl: kernels)
result.append(handle_from_new_ptr(new kernel(knl, true)));
return result;
}
inline
event *enqueue_nd_range_kernel(
command_queue &cq,
kernel &knl,
py::handle py_global_work_size,
py::handle py_local_work_size,
py::handle py_global_work_offset,
py::handle py_wait_for,
bool g_times_l,
bool allow_empty_ndrange)
{
PYOPENCL_PARSE_WAIT_FOR;
std::array<size_t, MAX_WS_DIM_COUNT> global_work_size;
unsigned gws_size = 0;
COPY_PY_ARRAY("enqueue_nd_range_kernel", size_t, global_work_size, gws_size);
cl_uint work_dim = gws_size;
std::array<size_t, MAX_WS_DIM_COUNT> local_work_size;
unsigned lws_size = 0;
size_t *local_work_size_ptr = nullptr;
if (py_local_work_size.ptr() != Py_None)
{
COPY_PY_ARRAY("enqueue_nd_range_kernel", size_t, local_work_size, lws_size);
work_dim = std::max(work_dim, lws_size);
if (work_dim != lws_size)
throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
"global/local work sizes have differing dimensions");
while (lws_size < work_dim)
local_work_size[lws_size++] = 1;
while (gws_size < work_dim)
global_work_size[gws_size++] = 1;
local_work_size_ptr = &local_work_size.front();
if (g_times_l && lws_size)
{
for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
global_work_size[work_axis] *= local_work_size[work_axis];
}
size_t *global_work_offset_ptr = nullptr;
std::array<size_t, MAX_WS_DIM_COUNT> global_work_offset;
if (py_global_work_offset.ptr() != Py_None)
{
unsigned gwo_size = 0;
COPY_PY_ARRAY("enqueue_nd_range_kernel", size_t, global_work_offset, gwo_size);
if (work_dim != gwo_size)
throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
"global work size and offset have differing dimensions");
if (g_times_l && local_work_size_ptr)
{
for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
global_work_offset[work_axis] *= local_work_size[work_axis];
}
global_work_offset_ptr = &global_work_offset.front();
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
if (allow_empty_ndrange)
{
#if PYOPENCL_CL_VERSION >= 0x1020
bool is_empty = false;
for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
if (global_work_size[work_axis] == 0)
is_empty = true;
if (local_work_size_ptr)
for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
if (local_work_size_ptr[work_axis] == 0)
is_empty = true;
if (is_empty)
{
cl_event evt;
PYOPENCL_CALL_GUARDED(clEnqueueMarkerWithWaitList, (
cq.data(), PYOPENCL_WAITLIST_ARGS, &evt));
PYOPENCL_RETURN_NEW_EVENT(evt);
}
#else
// clEnqueueWaitForEvents + clEnqueueMarker is not equivalent
// in the case of an out-of-order queue.
throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
"allow_empty_ndrange requires OpenCL 1.2");
#endif
}
PYOPENCL_RETRY_RETURN_IF_MEM_ERROR( {
cl_event evt;
PYOPENCL_CALL_GUARDED(clEnqueueNDRangeKernel, (
cq.data(),
knl.data(),
work_dim,
global_work_offset_ptr,
local_work_size_ptr,
PYOPENCL_WAITLIST_ARGS, &evt
));
PYOPENCL_RETURN_NEW_EVENT(evt);
} );
}
// }}}
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
// {{{ gl interop
inline
bool have_gl()
{
#ifdef HAVE_GL
return true;
#else
return false;
#endif
}
#ifdef HAVE_GL
#ifdef __APPLE__
inline
cl_context_properties get_apple_cgl_share_group()
{
CGLContextObj kCGLContext = CGLGetCurrentContext();
CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
return (cl_context_properties) kCGLShareGroup;
}
#endif /* __APPLE__ */
class gl_buffer : public memory_object
{
public:
gl_buffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
: memory_object(mem, retain, std::move(hostbuf))
{ }
};
class gl_renderbuffer : public memory_object
{
public:
gl_renderbuffer(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
: memory_object(mem, retain, std::move(hostbuf))
{ }
};
class gl_texture : public image
{
public:
gl_texture(cl_mem mem, bool retain, hostbuf_t hostbuf=hostbuf_t())
: image(mem, retain, std::move(hostbuf))
{ }
py::object get_gl_texture_info(cl_gl_texture_info param_name)
{
switch (param_name)
{
case CL_GL_TEXTURE_TARGET:
PYOPENCL_GET_TYPED_INFO(GLTexture, data(), param_name, GLenum);
PYOPENCL_GET_TYPED_INFO(GLTexture, data(), param_name, GLint);
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
default:
throw error("MemoryObject.get_gl_texture_info", CL_INVALID_VALUE);
}
}
};
#define PYOPENCL_WRAP_BUFFER_CREATOR(TYPE, NAME, CL_NAME, ARGS, CL_ARGS) \
inline \
TYPE *NAME ARGS \
{ \
cl_int status_code; \
PYOPENCL_PRINT_CALL_TRACE(#CL_NAME); \
cl_mem mem = CL_NAME CL_ARGS; \
\
if (status_code != CL_SUCCESS) \
throw pyopencl::error(#CL_NAME, status_code); \
\
try \
{ \
return new TYPE(mem, false); \
} \
catch (...) \
{ \
PYOPENCL_CALL_GUARDED(clReleaseMemObject, (mem)); \
throw; \
} \
}
PYOPENCL_WRAP_BUFFER_CREATOR(gl_buffer,
create_from_gl_buffer, clCreateFromGLBuffer,
(context &ctx, cl_mem_flags flags, GLuint bufobj),
(ctx.data(), flags, bufobj, &status_code));
PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture,
create_from_gl_texture_2d, clCreateFromGLTexture2D,
(context &ctx, cl_mem_flags flags,
GLenum texture_target, GLint miplevel, GLuint texture),
(ctx.data(), flags, texture_target, miplevel, texture, &status_code));
PYOPENCL_WRAP_BUFFER_CREATOR(gl_texture,
create_from_gl_texture_3d, clCreateFromGLTexture3D,
(context &ctx, cl_mem_flags flags,
GLenum texture_target, GLint miplevel, GLuint texture),
(ctx.data(), flags, texture_target, miplevel, texture, &status_code));
PYOPENCL_WRAP_BUFFER_CREATOR(gl_renderbuffer,
create_from_gl_renderbuffer, clCreateFromGLRenderbuffer,
(context &ctx, cl_mem_flags flags, GLuint renderbuffer),
(ctx.data(), flags, renderbuffer, &status_code));
inline
gl_texture *create_from_gl_texture(
context &ctx, cl_mem_flags flags,
GLenum texture_target, GLint miplevel,
GLuint texture, unsigned dims)
{
if (dims == 2)
return create_from_gl_texture_2d(ctx, flags, texture_target, miplevel, texture);
else if (dims == 3)
return create_from_gl_texture_3d(ctx, flags, texture_target, miplevel, texture);
else
throw pyopencl::error("Image", CL_INVALID_VALUE,
"invalid dimension");