Newer
Older
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
: m_program(prog), m_program_kind(progkind)
{
if (retain)
PYOPENCL_CALL_GUARDED(clRetainProgram, (prog));
}
~program()
{
PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseProgram, (m_program));
}
cl_program data() const
{
return m_program;
}
program_kind_type kind() const
{
return m_program_kind;
}
PYOPENCL_EQUALITY_TESTS(program);
py::object get_info(cl_program_info param_name) const
{
switch (param_name)
{
case CL_PROGRAM_REFERENCE_COUNT:
PYOPENCL_GET_TYPED_INFO(Program, m_program, param_name,
cl_uint);
case CL_PROGRAM_CONTEXT:
PYOPENCL_GET_OPAQUE_INFO(Program, m_program, param_name,
cl_context, context);
case CL_PROGRAM_NUM_DEVICES:
PYOPENCL_GET_TYPED_INFO(Program, m_program, param_name, cl_uint);
case CL_PROGRAM_DEVICES:
{
std::vector<cl_device_id> result;
PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result);
py::list py_result;
for (cl_device_id did: result)
py_result.append(handle_from_new_ptr(
new pyopencl::device(did)));
return py_result;
}
case CL_PROGRAM_SOURCE:
PYOPENCL_GET_STR_INFO(Program, m_program, param_name);
case CL_PROGRAM_BINARY_SIZES:
{
std::vector<size_t> result;
PYOPENCL_GET_VEC_INFO(Program, m_program, param_name, result);
PYOPENCL_RETURN_VECTOR(size_t, result);
}
case CL_PROGRAM_BINARIES:
// {{{
{
std::vector<size_t> sizes;
PYOPENCL_GET_VEC_INFO(Program, m_program, CL_PROGRAM_BINARY_SIZES, sizes);
size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0);
std::unique_ptr<unsigned char []> result(
new unsigned char[total_size]);
std::vector<unsigned char *> result_ptrs;
unsigned char *ptr = result.get();
for (unsigned i = 0; i < sizes.size(); ++i)
{
result_ptrs.push_back(ptr);
ptr += sizes[i];
}
PYOPENCL_CALL_GUARDED(clGetProgramInfo,
(m_program, param_name, sizes.size()*sizeof(unsigned char *),
result_ptrs.empty( ) ? nullptr : &result_ptrs.front(), 0)); \
py::list py_result;
ptr = result.get();
for (unsigned i = 0; i < sizes.size(); ++i)
{
py::object binary_pyobj(
py::reinterpret_steal<py::object>(
#if PY_VERSION_HEX >= 0x03000000
PyBytes_FromStringAndSize(
reinterpret_cast<char *>(ptr), sizes[i])
#else
PyString_FromStringAndSize(
reinterpret_cast<char *>(ptr), sizes[i])
#endif
py_result.append(binary_pyobj);
ptr += sizes[i];
}
return py_result;
}
// }}}
#if PYOPENCL_CL_VERSION >= 0x1020
case CL_PROGRAM_NUM_KERNELS:
PYOPENCL_GET_TYPED_INFO(Program, m_program, param_name,
size_t);
case CL_PROGRAM_KERNEL_NAMES:
PYOPENCL_GET_STR_INFO(Program, m_program, param_name);
#endif
#if PYOPENCL_CL_VERSION >= 0x2010
case CL_PROGRAM_IL:
PYOPENCL_GET_STR_INFO(Program, m_program, param_name);
#endif
#if PYOPENCL_CL_VERSION >= 0x2020
case CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT:
case CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT:
PYOPENCL_GET_TYPED_INFO(Program, m_program, param_name, cl_bool);
#endif
default:
throw error("Program.get_info", CL_INVALID_VALUE);
}
}
py::object get_build_info(
device const &dev,
cl_program_build_info param_name) const
{
switch (param_name)
{
#define PYOPENCL_FIRST_ARG m_program, dev.data() // hackety hack
case CL_PROGRAM_BUILD_STATUS:
PYOPENCL_GET_TYPED_INFO(ProgramBuild,
PYOPENCL_FIRST_ARG, param_name,
cl_build_status);
case CL_PROGRAM_BUILD_OPTIONS:
case CL_PROGRAM_BUILD_LOG:
PYOPENCL_GET_STR_INFO(ProgramBuild,
PYOPENCL_FIRST_ARG, param_name);
#if PYOPENCL_CL_VERSION >= 0x1020
case CL_PROGRAM_BINARY_TYPE:
PYOPENCL_GET_TYPED_INFO(ProgramBuild,
PYOPENCL_FIRST_ARG, param_name,
cl_program_binary_type);
#endif
#if PYOPENCL_CL_VERSION >= 0x2000
case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE:
PYOPENCL_GET_TYPED_INFO(ProgramBuild,
PYOPENCL_FIRST_ARG, param_name,
size_t);
#endif
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
#undef PYOPENCL_FIRST_ARG
default:
throw error("Program.get_build_info", CL_INVALID_VALUE);
}
}
void build(std::string options, py::object py_devices)
{
PYOPENCL_PARSE_PY_DEVICES;
PYOPENCL_CALL_GUARDED_THREADED(clBuildProgram,
(m_program, num_devices, devices,
options.c_str(), 0 ,0));
}
#if PYOPENCL_CL_VERSION >= 0x1020
void compile(std::string options, py::object py_devices,
py::object py_headers)
{
PYOPENCL_PARSE_PY_DEVICES;
// {{{ pick apart py_headers
// py_headers is a list of tuples *(name, program)*
std::vector<std::string> header_names;
std::vector<cl_program> programs;
for (py::handle name_hdr_tup_py: py_headers)
py::tuple name_hdr_tup = py::reinterpret_borrow<py::tuple>(name_hdr_tup_py);
if (py::len(name_hdr_tup) != 2)
throw error("Program.compile", CL_INVALID_VALUE,
"epxected (name, header) tuple in headers list");
std::string name = (name_hdr_tup[0]).cast<std::string>();
program &prg = (name_hdr_tup[1]).cast<program &>();
header_names.push_back(name);
programs.push_back(prg.data());
}
std::vector<const char *> header_name_ptrs;
for (std::string const &name: header_names)
header_name_ptrs.push_back(name.c_str());
// }}}
PYOPENCL_CALL_GUARDED_THREADED(clCompileProgram,
(m_program, num_devices, devices,
options.c_str(), header_names.size(),
programs.empty() ? nullptr : &programs.front(),
header_name_ptrs.empty() ? nullptr : &header_name_ptrs.front(),
#if PYOPENCL_CL_VERSION >= 0x2020
void set_specialization_constant(cl_uint spec_id, py::object py_buffer)
{
py_buffer_wrapper bufwrap;
bufwrap.get(py_buffer.ptr(), PyBUF_ANY_CONTIGUOUS);
PYOPENCL_CALL_GUARDED(clSetProgramSpecializationConstant,
(m_program, spec_id, bufwrap.m_buf.len, bufwrap.m_buf.buf));
}
#endif
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
};
inline
program *create_program_with_source(
context &ctx,
std::string const &src)
{
const char *string = src.c_str();
size_t length = src.size();
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithSource");
cl_program result = clCreateProgramWithSource(
ctx.data(), 1, &string, &length, &status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clCreateProgramWithSource", status_code);
try
{
return new program(result, false, program::KND_SOURCE);
}
catch (...)
{
clReleaseProgram(result);
throw;
}
}
inline
program *create_program_with_binary(
context &ctx,
py::sequence py_devices,
py::sequence py_binaries)
{
std::vector<cl_device_id> devices;
std::vector<const unsigned char *> binaries;
std::vector<size_t> sizes;
size_t num_devices = len(py_devices);
if (len(py_binaries) != num_devices)
throw error("create_program_with_binary", CL_INVALID_VALUE,
"device and binary counts don't match");
for (size_t i = 0; i < num_devices; ++i)
(py_devices[i]).cast<device const &>().data());
const void *buf;
PYOPENCL_BUFFER_SIZE_T len;
py_buffer_wrapper buf_wrapper;
buf_wrapper.get(py::object(py_binaries[i]).ptr(), PyBUF_ANY_CONTIGUOUS);
buf = buf_wrapper.m_buf.buf;
len = buf_wrapper.m_buf.len;
binaries.push_back(reinterpret_cast<const unsigned char *>(buf));
sizes.push_back(len);
}
Andreas Klöckner
committed
PYOPENCL_STACK_CONTAINER(cl_int, binary_statuses, num_devices);
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBinary");
cl_program result = clCreateProgramWithBinary(
ctx.data(), num_devices,
devices.empty( ) ? nullptr : &devices.front(),
sizes.empty( ) ? nullptr : &sizes.front(),
binaries.empty( ) ? nullptr : &binaries.front(),
Andreas Klöckner
committed
PYOPENCL_STACK_CONTAINER_GET_PTR(binary_statuses),
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
&status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clCreateProgramWithBinary", status_code);
/*
for (int i = 0; i < num_devices; ++i)
printf("%d:%d\n", i, binary_statuses[i]);
*/
try
{
return new program(result, false, program::KND_BINARY);
}
catch (...)
{
clReleaseProgram(result);
throw;
}
}
#if (PYOPENCL_CL_VERSION >= 0x1020) || \
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
((PYOPENCL_CL_VERSION >= 0x1030) && defined(__APPLE__))
inline
program *create_program_with_built_in_kernels(
context &ctx,
py::object py_devices,
std::string const &kernel_names)
{
PYOPENCL_PARSE_PY_DEVICES;
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithBuiltInKernels");
cl_program result = clCreateProgramWithBuiltInKernels(
ctx.data(), num_devices, devices,
kernel_names.c_str(), &status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clCreateProgramWithBuiltInKernels", status_code);
try
{
return new program(result, false);
}
catch (...)
{
clReleaseProgram(result);
throw;
}
}
#endif
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
#if (PYOPENCL_CL_VERSION >= 0x2010)
inline
program *create_program_with_il(
context &ctx,
std::string const &src)
{
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCreateProgramWithIL");
cl_program result = clCreateProgramWithIL(
ctx.data(), src.c_str(), src.size(), &status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clCreateProgramWithIL", status_code);
try
{
return new program(result, false, program::KND_IL);
}
catch (...)
{
clReleaseProgram(result);
throw;
}
}
#endif
#if PYOPENCL_CL_VERSION >= 0x1020
inline
program *link_program(
context &ctx,
py::object py_programs,
std::string const &options,
py::object py_devices
)
{
PYOPENCL_PARSE_PY_DEVICES;
std::vector<cl_program> programs;
for (py::handle py_prg: py_programs)
program &prg = (py_prg).cast<program &>();
programs.push_back(prg.data());
}
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clLinkProgram");
cl_program result = clLinkProgram(
ctx.data(), num_devices, devices,
options.c_str(),
programs.size(),
0, 0,
&status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clLinkProgram", result, status_code);
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
try
{
return new program(result, false);
}
catch (...)
{
clReleaseProgram(result);
throw;
}
}
#endif
#if PYOPENCL_CL_VERSION >= 0x1020
inline
void unload_platform_compiler(platform &plat)
{
PYOPENCL_CALL_GUARDED(clUnloadPlatformCompiler, (plat.data()));
}
#endif
// }}}
// {{{ kernel
class local_memory
{
private:
size_t m_size;
public:
local_memory(size_t size)
: m_size(size)
{ }
size_t size() const
{ return m_size; }
};
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
{
private:
cl_kernel m_kernel;
public:
kernel(cl_kernel knl, bool retain)
: m_kernel(knl)
{
if (retain)
PYOPENCL_CALL_GUARDED(clRetainKernel, (knl));
}
kernel(program const &prg, std::string const &kernel_name)
{
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCreateKernel");
m_kernel = clCreateKernel(prg.data(), kernel_name.c_str(),
&status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clCreateKernel", status_code);
}
~kernel()
{
PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseKernel, (m_kernel));
}
cl_kernel data() const
{
return m_kernel;
}
PYOPENCL_EQUALITY_TESTS(kernel);
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
#if PYOPENCL_CL_VERSION >= 0x2010
kernel *clone()
{
cl_int status_code;
PYOPENCL_PRINT_CALL_TRACE("clCloneKernel");
cl_kernel result = clCloneKernel(m_kernel, &status_code);
if (status_code != CL_SUCCESS)
throw pyopencl::error("clCloneKernel", status_code);
try
{
return new kernel(result, /* retain */ false);
}
catch (...)
{
PYOPENCL_CALL_GUARDED_CLEANUP(clReleaseKernel, (result));
throw;
}
}
#endif
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
void set_arg_null(cl_uint arg_index)
{
cl_mem m = 0;
PYOPENCL_CALL_GUARDED(clSetKernelArg, (m_kernel, arg_index,
sizeof(cl_mem), &m));
}
void set_arg_mem(cl_uint arg_index, memory_object_holder &moh)
{
cl_mem m = moh.data();
PYOPENCL_CALL_GUARDED(clSetKernelArg,
(m_kernel, arg_index, sizeof(cl_mem), &m));
}
void set_arg_local(cl_uint arg_index, local_memory const &loc)
{
PYOPENCL_CALL_GUARDED(clSetKernelArg,
(m_kernel, arg_index, loc.size(), 0));
}
void set_arg_sampler(cl_uint arg_index, sampler const &smp)
{
cl_sampler s = smp.data();
PYOPENCL_CALL_GUARDED(clSetKernelArg,
(m_kernel, arg_index, sizeof(cl_sampler), &s));
}
void set_arg_command_queue(cl_uint arg_index, command_queue const &queue)
{
cl_command_queue q = queue.data();
PYOPENCL_CALL_GUARDED(clSetKernelArg,
(m_kernel, arg_index, sizeof(cl_command_queue), &q));
}
Andreas Klöckner
committed
void set_arg_buf_pack(cl_uint arg_index, py::handle py_typechar, py::handle obj)
{
std::string typechar_str(py::cast<std::string>(py_typechar));
if (typechar_str.size() != 1)
throw error("Kernel.set_arg_buf_pack", CL_INVALID_VALUE,
"type char argument must have exactly one character");
char typechar = typechar_str[0];
Andreas Klöckner
committed
#define PYOPENCL_KERNEL_PACK_AND_SET_ARG(TYPECH_VAL, TYPE) \
case TYPECH_VAL: \
{ \
TYPE val = py::cast<TYPE>(obj); \
PYOPENCL_CALL_GUARDED(clSetKernelArg, (m_kernel, arg_index, sizeof(val), &val)); \
break; \
}
Andreas Klöckner
committed
{
PYOPENCL_KERNEL_PACK_AND_SET_ARG('c', char)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('b', signed char)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('B', unsigned char)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('h', short)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('H', unsigned short)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('i', int)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('I', unsigned int)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('l', long)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('L', unsigned long)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('f', float)
PYOPENCL_KERNEL_PACK_AND_SET_ARG('d', double)
default:
throw error("Kernel.set_arg_buf_pack", CL_INVALID_VALUE,
"invalid type char");
}
#undef PYOPENCL_KERNEL_PACK_AND_SET_ARG
}
void set_arg_buf(cl_uint arg_index, py::handle py_buffer)
{
const void *buf;
PYOPENCL_BUFFER_SIZE_T len;
py_buffer_wrapper buf_wrapper;
try
{
buf_wrapper.get(py_buffer.ptr(), PyBUF_ANY_CONTIGUOUS);
}
catch (py::error_already_set &)
{
PyErr_Clear();
throw error("Kernel.set_arg", CL_INVALID_VALUE,
"invalid kernel argument");
}
buf = buf_wrapper.m_buf.buf;
len = buf_wrapper.m_buf.len;
PYOPENCL_CALL_GUARDED(clSetKernelArg,
(m_kernel, arg_index, len, buf));
}
#if PYOPENCL_CL_VERSION >= 0x2000
void set_arg_svm(cl_uint arg_index, svm_arg_wrapper const &wrp)
{
PYOPENCL_CALL_GUARDED(clSetKernelArgSVMPointer,
(m_kernel, arg_index, wrp.ptr()));
}
#endif
void set_arg(cl_uint arg_index, py::handle arg)
{
if (arg.ptr() == Py_None)
{
set_arg_null(arg_index);
return;
}
set_arg_mem(arg_index, arg.cast<memory_object_holder &>());
#if PYOPENCL_CL_VERSION >= 0x2000
try
{
set_arg_svm(arg_index, arg.cast<svm_arg_wrapper const &>());
return;
}
catch (py::cast_error &) { }
#endif
set_arg_local(arg_index, arg.cast<local_memory>());
set_arg_sampler(arg_index, arg.cast<const sampler &>());
try
{
set_arg_command_queue(arg_index, arg.cast<const command_queue &>());
return;
}
catch (py::cast_error &) { }
set_arg_buf(arg_index, arg);
}
py::object get_info(cl_kernel_info param_name) const
{
switch (param_name)
{
case CL_KERNEL_FUNCTION_NAME:
PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name);
case CL_KERNEL_NUM_ARGS:
case CL_KERNEL_REFERENCE_COUNT:
PYOPENCL_GET_TYPED_INFO(Kernel, m_kernel, param_name,
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
cl_uint);
case CL_KERNEL_CONTEXT:
PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name,
cl_context, context);
case CL_KERNEL_PROGRAM:
PYOPENCL_GET_OPAQUE_INFO(Kernel, m_kernel, param_name,
cl_program, program);
#if PYOPENCL_CL_VERSION >= 0x1020
case CL_KERNEL_ATTRIBUTES:
PYOPENCL_GET_STR_INFO(Kernel, m_kernel, param_name);
#endif
default:
throw error("Kernel.get_info", CL_INVALID_VALUE);
}
}
py::object get_work_group_info(
cl_kernel_work_group_info param_name,
device const &dev
) const
{
switch (param_name)
{
#define PYOPENCL_FIRST_ARG m_kernel, dev.data() // hackety hack
case CL_KERNEL_WORK_GROUP_SIZE:
PYOPENCL_GET_TYPED_INFO(KernelWorkGroup,
PYOPENCL_FIRST_ARG, param_name,
size_t);
case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
{
std::vector<size_t> result;
PYOPENCL_GET_VEC_INFO(KernelWorkGroup,
PYOPENCL_FIRST_ARG, param_name, result);
PYOPENCL_RETURN_VECTOR(size_t, result);
}
case CL_KERNEL_LOCAL_MEM_SIZE:
#if PYOPENCL_CL_VERSION >= 0x1010
case CL_KERNEL_PRIVATE_MEM_SIZE:
#endif
PYOPENCL_GET_TYPED_INFO(KernelWorkGroup,
PYOPENCL_FIRST_ARG, param_name,
cl_ulong);
#if PYOPENCL_CL_VERSION >= 0x1010
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
PYOPENCL_GET_TYPED_INFO(KernelWorkGroup,
PYOPENCL_FIRST_ARG, param_name,
size_t);
#endif
default:
throw error("Kernel.get_work_group_info", CL_INVALID_VALUE);
#undef PYOPENCL_FIRST_ARG
}
}
#if PYOPENCL_CL_VERSION >= 0x1020
py::object get_arg_info(
cl_uint arg_index,
cl_kernel_arg_info param_name
) const
{
switch (param_name)
{
#define PYOPENCL_FIRST_ARG m_kernel, arg_index // hackety hack
case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
PYOPENCL_GET_TYPED_INFO(KernelArg,
PYOPENCL_FIRST_ARG, param_name,
cl_kernel_arg_address_qualifier);
case CL_KERNEL_ARG_ACCESS_QUALIFIER:
PYOPENCL_GET_TYPED_INFO(KernelArg,
PYOPENCL_FIRST_ARG, param_name,
cl_kernel_arg_access_qualifier);
case CL_KERNEL_ARG_TYPE_NAME:
case CL_KERNEL_ARG_NAME:
PYOPENCL_GET_STR_INFO(KernelArg, PYOPENCL_FIRST_ARG, param_name);
case CL_KERNEL_ARG_TYPE_QUALIFIER:
PYOPENCL_GET_TYPED_INFO(KernelArg,
PYOPENCL_FIRST_ARG, param_name,
cl_kernel_arg_type_qualifier);
#undef PYOPENCL_FIRST_ARG
default:
throw error("Kernel.get_arg_info", CL_INVALID_VALUE);
}
}
#endif
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
#if PYOPENCL_CL_VERSION >= 0x2010
py::object get_sub_group_info(
device const &dev,
cl_kernel_sub_group_info param_name,
py::object py_input_value)
{
switch (param_name)
{
// size_t * -> size_t
case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE:
case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE:
{
std::vector<size_t> input_value;
COPY_PY_LIST(size_t, input_value);
size_t param_value;
PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo,
(m_kernel, dev.data(), param_name,
input_value.size()*sizeof(input_value.front()),
input_value.empty() ? nullptr : &input_value.front(),
sizeof(param_value), ¶m_value, 0));
return py::cast(param_value);
}
// size_t -> size_t[]
case CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT:
{
size_t input_value = py::cast<size_t>(py_input_value);
std::vector<size_t> result;
size_t size;
PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo,
(m_kernel, dev.data(), param_name,
sizeof(input_value), &input_value,
0, nullptr, &size));
result.resize(size / sizeof(result.front()));
PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo,
(m_kernel, dev.data(), param_name,
sizeof(input_value), &input_value,
size, result.empty() ? nullptr : &result.front(), 0));
PYOPENCL_RETURN_VECTOR(size_t, result);
}
// () -> size_t
case CL_KERNEL_MAX_NUM_SUB_GROUPS:
case CL_KERNEL_COMPILE_NUM_SUB_GROUPS:
{
size_t param_value;
PYOPENCL_CALL_GUARDED(clGetKernelSubGroupInfo,
(m_kernel, dev.data(), param_name,
0, nullptr,
sizeof(param_value), ¶m_value, 0));
return py::cast(param_value);
}
default:
throw error("Kernel.get_sub_group_info", CL_INVALID_VALUE);
}
}
#endif
Andreas Klöckner
committed
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
#define PYOPENCL_KERNEL_SET_ARG_MULTI_ERROR_HANDLER \
catch (error &err) \
{ \
std::string msg( \
std::string("when processing arg#") + std::to_string(arg_index+1) \
+ std::string(" (1-based): ") + std::string(err.what())); \
auto mod_cl_ary(py::module::import("pyopencl.array")); \
auto cls_array(mod_cl_ary.attr("Array")); \
if (arg_value.ptr() && py::isinstance(arg_value, cls_array)) \
msg.append( \
" (perhaps you meant to pass 'array.data' instead of the array itself?)"); \
throw error(err.routine().c_str(), err.code(), msg.c_str()); \
} \
catch (std::exception &err) \
{ \
std::string msg( \
std::string("when processing arg#") + std::to_string(arg_index+1) \
+ std::string(" (1-based): ") + std::string(err.what())); \
throw std::runtime_error(msg.c_str()); \
}
inline
void set_arg_multi(
std::function<void(cl_uint, py::handle)> set_arg_func,
py::tuple args_and_indices)
{
cl_uint arg_index;
py::handle arg_value;
auto it = args_and_indices.begin(), end = args_and_indices.end();
try
{
/* This is an internal interface that assumes it gets fed well-formed
* data. No meaningful error checking is being performed on
* off-interval exhaustion of the iterator, on purpose.
*/
while (it != end)
{
// special value in case integer cast fails
arg_index = 9999 - 1;
arg_index = py::cast<cl_uint>(*it++);
arg_value = *it++;
set_arg_func(arg_index, arg_value);
}
}
PYOPENCL_KERNEL_SET_ARG_MULTI_ERROR_HANDLER
}
inline
void set_arg_multi(
std::function<void(cl_uint, py::handle, py::handle)> set_arg_func,
py::tuple args_and_indices)
{
cl_uint arg_index;
py::handle arg_descr, arg_value;
auto it = args_and_indices.begin(), end = args_and_indices.end();
try
{
/* This is an internal interface that assumes it gets fed well-formed
* data. No meaningful error checking is being performed on
* off-interval exhaustion of the iterator, on purpose.
*/
while (it != end)
{
// special value in case integer cast fails
arg_index = 9999 - 1;
arg_index = py::cast<cl_uint>(*it++);
arg_descr = *it++;
arg_value = *it++;
set_arg_func(arg_index, arg_descr, arg_value);
}
}
PYOPENCL_KERNEL_SET_ARG_MULTI_ERROR_HANDLER
}
inline
py::list create_kernels_in_program(program &pgm)
{
cl_uint num_kernels;
PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, (
pgm.data(), 0, 0, &num_kernels));
std::vector<cl_kernel> kernels(num_kernels);
PYOPENCL_CALL_GUARDED(clCreateKernelsInProgram, (
pgm.data(), num_kernels,
kernels.empty( ) ? nullptr : &kernels.front(), &num_kernels));
for (cl_kernel knl: kernels)
result.append(handle_from_new_ptr(new kernel(knl, true)));
return result;
}
inline
event *enqueue_nd_range_kernel(
command_queue &cq,
kernel &knl,
py::handle py_global_work_size,
py::handle py_local_work_size,
py::handle py_global_work_offset,
py::handle py_wait_for,
bool g_times_l,
bool allow_empty_ndrange)
{
PYOPENCL_PARSE_WAIT_FOR;
std::array<size_t, MAX_WS_DIM_COUNT> global_work_size;
unsigned gws_size = 0;
COPY_PY_ARRAY("enqueue_nd_range_kernel", size_t, global_work_size, gws_size);
cl_uint work_dim = gws_size;
std::array<size_t, MAX_WS_DIM_COUNT> local_work_size;
unsigned lws_size = 0;
size_t *local_work_size_ptr = nullptr;
if (py_local_work_size.ptr() != Py_None)
{
COPY_PY_ARRAY("enqueue_nd_range_kernel", size_t, local_work_size, lws_size);
work_dim = std::max(work_dim, lws_size);
if (work_dim != lws_size)
throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
"global/local work sizes have differing dimensions");
while (lws_size < work_dim)
local_work_size[lws_size++] = 1;
while (gws_size < work_dim)
global_work_size[gws_size++] = 1;
local_work_size_ptr = &local_work_size.front();
if (g_times_l && lws_size)
{
for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
global_work_size[work_axis] *= local_work_size[work_axis];
}
size_t *global_work_offset_ptr = nullptr;
std::array<size_t, MAX_WS_DIM_COUNT> global_work_offset;
if (py_global_work_offset.ptr() != Py_None)
{
unsigned gwo_size = 0;
COPY_PY_ARRAY("enqueue_nd_range_kernel", size_t, global_work_offset, gwo_size);
if (work_dim != gwo_size)
throw error("enqueue_nd_range_kernel", CL_INVALID_VALUE,
"global work size and offset have differing dimensions");
if (g_times_l && local_work_size_ptr)
{
for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
global_work_offset[work_axis] *= local_work_size[work_axis];
}
global_work_offset_ptr = &global_work_offset.front();
if (allow_empty_ndrange)
{
#if PYOPENCL_CL_VERSION >= 0x1020
bool is_empty = false;
for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
if (global_work_size[work_axis] == 0)
is_empty = true;
if (local_work_size_ptr)
for (cl_uint work_axis = 0; work_axis < work_dim; ++work_axis)
if (local_work_size_ptr[work_axis] == 0)
is_empty = true;
if (is_empty)
{