Skip to content
Snippets Groups Projects
Commit f95a6b87 authored by gw0 [http://gw.tnode.com/]'s avatar gw0 [http://gw.tnode.com/]
Browse files

Add `out` parameter to ReductionKernel call.

parent d3f31844
No related branches found
No related tags found
No related merge requests found
......@@ -73,9 +73,8 @@ Sums and counts ("reduce")
Vectors in *map_expr* should be indexed by the variable *i*. *reduce_expr*
uses the formal values "a" and "b" to indicate two operands of a binary
reduction operation. If you do not specify a *map_expr*, "in[i]" -- and
therefore the presence of only one input argument -- is automatically
assumed.
reduction operation. If you do not specify a *map_expr*, ``in[i]`` is
automatically assumed and treated as the only one input argument.
*dtype_out* specifies the :class:`numpy.dtype` in which the reduction is
performed and in which the result is returned. *neutral* is specified as
......@@ -86,10 +85,14 @@ Sums and counts ("reduce")
:meth:`pyopencl.Program.build`. *preamble* specifies a string of code that
is inserted before the actual kernels.
.. method:: __call__(*args, queue=None, wait_for=None, return_event=False)
.. method:: __call__(*args, queue=None, wait_for=None, return_event=False, out=None)
|explain-waitfor|
With *out* the resulting single-entry :class:`pyopencl.array.Array` can
be specified. Because offsets are supported one can store results
anywhere (e.g. ``out=a[3]``).
:return: the resulting scalar as a single-entry :class:`pyopencl.array.Array`
if *return_event* is *False*, otherwise a tuple ``(scalar_array, event)``.
......
......@@ -58,10 +58,11 @@ KERNEL = """//CL//
typedef ${out_type} out_type;
__kernel void ${name}(
__global out_type *out, ${arguments},
__global out_type *out__base, long out__offset, ${arguments},
unsigned int seq_count, unsigned int n)
{
${arg_prep}
__global out_type *out = (__global out_type *) ((__global char *) out__base + out__offset);
${arg_prep}
__local out_type ldata[GROUP_SIZE];
......@@ -267,7 +268,7 @@ def get_reduction_kernel(stage,
inf.arg_types = arguments
inf.kernel.set_scalar_arg_dtypes(
[None]
[None, np.int64]
+ get_arg_list_scalar_arg_dtypes(inf.arg_types)
+ [np.uint32]*2)
......@@ -332,6 +333,7 @@ class ReductionKernel:
queue = kwargs.pop("queue", None)
wait_for = kwargs.pop("wait_for", None)
return_event = kwargs.pop("return_event", False)
out = kwargs.pop("out", None)
if kwargs:
raise TypeError("invalid keyword argument to reduction kernel")
......@@ -373,7 +375,9 @@ class ReductionKernel:
macrogroup_size = group_count*stage_inf.group_size
seq_count = (sz + macrogroup_size - 1) // macrogroup_size
if group_count == 1:
if group_count == 1 and out is not None:
result = out
elif group_count == 1:
result = empty(use_queue,
(), self.dtype_out,
allocator=repr_vec.allocator)
......@@ -386,7 +390,7 @@ class ReductionKernel:
use_queue,
(group_count*stage_inf.group_size,),
(stage_inf.group_size,),
*([result.data]+invocation_args+[seq_count, sz]),
*([result.base_data, result.offset] + invocation_args + [seq_count, sz]),
**dict(wait_for=wait_for))
wait_for = [last_evt]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment