diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py index 73e43d561aa682e1e807259c6974fad862c0e319..92730750e18e1490ac65a159c3bd54ee5e2a9f96 100644 --- a/pyopencl/algorithm.py +++ b/pyopencl/algorithm.py @@ -35,7 +35,7 @@ import numpy as np import pyopencl as cl import pyopencl.array # noqa from pyopencl.scan import ScanTemplate -from pyopencl.tools import dtype_to_ctype +from pyopencl.tools import dtype_to_ctype, get_arg_offset_adjuster_code from pytools import memoize, memoize_method, Record from mako.template import Template @@ -609,8 +609,8 @@ typedef ${index_type} index_type; #define LIST_ARG_DECL ${user_list_arg_decl} #define LIST_ARGS ${user_list_args} -#define USER_ARG_DECL ${user_arg_decl} -#define USER_ARGS ${user_args} +#define USER_ARG_DECL ${user_arg_decl_no_offset} +#define USER_ARGS ${user_args_no_offset} // }}} @@ -622,7 +622,8 @@ __kernel %if do_not_vectorize: __attribute__((reqd_work_group_size(1, 1, 1))) %endif -void ${kernel_name}(${kernel_list_arg_decl} USER_ARG_DECL index_type n) +void ${kernel_name}( + ${kernel_list_arg_decl} ${user_arg_decl_with_offset} index_type n) { %if not do_not_vectorize: @@ -663,6 +664,7 @@ void ${kernel_name}(${kernel_list_arg_decl} USER_ARG_DECL index_type n) %endfor %endif + ${arg_offset_adjustment} generate(${kernel_list_arg_values} USER_ARGS i); %if is_count_stage: @@ -817,6 +819,16 @@ class ListOfListsBuilder: from pyopencl.tools import parse_arg_list self.arg_decls = parse_arg_list(arg_decls) + # To match with the signature of the user-supplied generate(), arguments + # can't appear to have offsets. + arg_decls_no_offset = [] + from pyopencl.tools import VectorArg + for arg in self.arg_decls: + if isinstance(arg, VectorArg) and arg.with_offset: + arg = VectorArg(arg.dtype, arg.name) + arg_decls_no_offset.append(arg) + self.arg_decls_no_offset = arg_decls_no_offset + self.count_sharing = count_sharing self.name_prefix = name_prefix @@ -925,8 +937,10 @@ class ListOfListsBuilder: kernel_list_arg_values=_get_arg_list(user_list_args, prefix="&"), user_list_arg_decl=_get_arg_decl(user_list_args), user_list_args=_get_arg_list(user_list_args), - user_arg_decl=_get_arg_decl(self.arg_decls), - user_args=_get_arg_list(self.arg_decls), + user_arg_decl_with_offset=_get_arg_decl(self.arg_decls), + user_arg_decl_no_offset=_get_arg_decl(self.arg_decls_no_offset), + user_args_no_offset=_get_arg_list(self.arg_decls_no_offset), + arg_offset_adjustment=get_arg_offset_adjuster_code(self.arg_decls), list_names_and_dtypes=self.list_names_and_dtypes, count_sharing=self.count_sharing, @@ -996,8 +1010,10 @@ class ListOfListsBuilder: kernel_list_arg_values=kernel_list_arg_values, user_list_arg_decl=_get_arg_decl(user_list_args), user_list_args=_get_arg_list(user_list_args), - user_arg_decl=_get_arg_decl(self.arg_decls), - user_args=_get_arg_list(self.arg_decls), + user_arg_decl_with_offset=_get_arg_decl(self.arg_decls), + user_arg_decl_no_offset=_get_arg_decl(self.arg_decls_no_offset), + user_args_no_offset=_get_arg_list(self.arg_decls_no_offset), + arg_offset_adjustment=get_arg_offset_adjuster_code(self.arg_decls), list_names_and_dtypes=self.list_names_and_dtypes, count_sharing=self.count_sharing, @@ -1095,6 +1111,9 @@ class ListOfListsBuilder: if self.eliminate_empty_output_lists: compress_kernel = self.get_compress_kernel(index_dtype) + from pyopencl.tools import expand_runtime_arg_list + args = expand_runtime_arg_list(self.arg_decls, args) + # {{{ allocate memory for counts for name, dtype in self.list_names_and_dtypes: diff --git a/pyopencl/scan.py b/pyopencl/scan.py index 915c50e3af0e0f8ab428c5d904318d76d485601c..6218d5716b409557341f4e8ce12105ad0dcf1a30 100644 --- a/pyopencl/scan.py +++ b/pyopencl/scan.py @@ -1480,15 +1480,8 @@ class GenericScanKernel(_GenericScanKernelBase): # We're done here. (But pretend to return an event.) return cl.enqueue_marker(queue, wait_for=wait_for) - data_args = [] - from pyopencl.tools import VectorArg - for arg_descr, arg_val in zip(self.parsed_args, args): - if isinstance(arg_descr, VectorArg): - data_args.append(arg_val.base_data) - if arg_descr.with_offset: - data_args.append(arg_val.offset) - else: - data_args.append(arg_val) + from pyopencl.tools import expand_runtime_arg_list + data_args = list(expand_runtime_arg_list(self.parsed_args, args)) # }}} diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 05ccc5d079cbf42c5fe415adc190c845a15bfcac..b1dcf9f23e84acce7c83a2f9491e2a9cb9188f6b 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -401,6 +401,17 @@ def get_arg_offset_adjuster_code(arg_types): return "\n".join(result) +def expand_runtime_arg_list(args, user_args): + data_args = [] + for arg_descr, arg_val in zip(args, user_args): + if isinstance(arg_descr, VectorArg): + data_args.append(arg_val.base_data) + if arg_descr.with_offset: + data_args.append(arg_val.offset) + else: + data_args.append(arg_val) + return tuple(data_args) + # }}} diff --git a/test/test_algorithm.py b/test/test_algorithm.py index d63ed7881222aa9ed3a7c4b646bd05ffc4cc6866..5c09b565bac9440a862e9f98fff1bddc06ad1d7b 100644 --- a/test/test_algorithm.py +++ b/test/test_algorithm.py @@ -880,6 +880,34 @@ def test_list_builder(ctx_factory): assert (inf.lists.get()[-6:] == [1, 2, 2, 3, 3, 3]).all() +def test_list_builder_with_offset(ctx_factory): + from pytest import importorskip + importorskip("mako") + + context = ctx_factory() + queue = cl.CommandQueue(context) + + from pyopencl.algorithm import ListOfListsBuilder + from pyopencl.tools import VectorArg + builder = ListOfListsBuilder(context, [("mylist", np.int32)], """//CL// + void generate(LIST_ARG_DECL USER_ARG_DECL index_type i) + { + APPEND_mylist(input_list[i]); + } + """, arg_decls=[ + VectorArg(float, "input_list", with_offset=True)]) + + n = 10000 + input_list = cl.array.zeros(queue, (n + 10,), float) + input_list[10:] = 1 + + result, evt = builder(queue, n, input_list[10:]) + + inf = result["mylist"] + assert inf.count == n + assert (inf.lists.get() == 1).all() + + def test_list_builder_with_empty_elim(ctx_factory): from pytest import importorskip importorskip("mako")