diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py index 9a6d794eeaeffde8a60f7a44aa4ee3f80cca9a79..2375c6ee90b685efce1021f21dbaa80f2073540d 100644 --- a/pyopencl/elementwise.py +++ b/pyopencl/elementwise.py @@ -381,7 +381,9 @@ def get_take_kernel(context, dtype, idx_dtype, vec_count=1): "dest%d[i] = src%d[src_idx];" % (i, i) for i in range(vec_count))) - return get_elwise_kernel(context, args, body, name="take") + return get_elwise_kernel(context, args, body, + preamble=dtype_to_c_struct(context.devices[0], dtype), + name="take") @context_dependent_memoize @@ -419,7 +421,9 @@ def get_take_put_kernel(context, dtype, idx_dtype, with_offsets, vec_count=1): "%(idx_tp)s dest_idx = gmem_dest_idx[i];\n" % ctx) + "\n".join(get_copy_insn(i) for i in range(vec_count))) - return get_elwise_kernel(context, args, body, name="take_put") + return get_elwise_kernel(context, args, body, + preamble=dtype_to_c_struct(context.devices[0], dtype), + name="take_put") @context_dependent_memoize @@ -444,7 +448,9 @@ def get_put_kernel(context, dtype, idx_dtype, vec_count=1): + "\n".join("dest%d[dest_idx] = src%d[i];" % (i, i) for i in range(vec_count))) - return get_elwise_kernel(context, args, body, name="put") + return get_elwise_kernel(context, args, body, + preamble=dtype_to_c_struct(context.devices[0], dtype), + name="put") @context_dependent_memoize @@ -462,6 +468,7 @@ def get_copy_kernel(context, dtype_dest, dtype_src): "tp_src": dtype_to_ctype(dtype_src), }, "dest[i] = %s" % src, + preamble=dtype_to_c_struct(context.devices[0], dtype), name="copy") diff --git a/test/test_array.py b/test/test_array.py index d1e0c082ca99c884aaa2a95800d3fc3f24e0492c..3a2e88c09908cc0061275ed25e810c7b7e117408 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -289,6 +289,35 @@ def test_custom_type_fill(ctx_factory): assert np.array_equal(np.zeros(n, dtype), z) + +def test_custom_type_take_put(ctx_factory): + context = ctx_factory() + queue = cl.CommandQueue(context) + + dtype = np.dtype([ + ("cur_min", np.int32), + ("cur_max", np.int32), + ]) + + from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct + + name = "tp_type" + dtype, c_decl = match_dtype_to_c_struct(queue.device, name, dtype) + dtype = get_or_register_dtype(name, dtype) + + n = 100 + z = np.empty(100, dtype) + z["cur_min"] = np.arange(n) + z["cur_max"] = np.arange(n)**2 + + z_dev = cl.array.to_device(queue, z) + ind = cl.array.arange(queue, n, step=3, dtype=np.int32) + + z_ind_ref = z[ind.get()] + z_ind = z_dev[ind] + + assert np.array_equal(z_ind.get(), z_ind_ref) + # }}}