From 49ec475068a5a8bec6851b46e2dc9a095fc4ecf9 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 13 Nov 2018 19:58:04 -0600 Subject: [PATCH 1/3] ListOfListsBuilder: Support user-supplied VectorArgs with offsets. --- pyopencl/algorithm.py | 35 +++++++++++++++++++++++++++-------- pyopencl/scan.py | 13 +++---------- pyopencl/tools.py | 11 +++++++++++ test/test_algorithm.py | 27 +++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 18 deletions(-) diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py index 73e43d56..92730750 100644 --- a/pyopencl/algorithm.py +++ b/pyopencl/algorithm.py @@ -35,7 +35,7 @@ import numpy as np import pyopencl as cl import pyopencl.array # noqa from pyopencl.scan import ScanTemplate -from pyopencl.tools import dtype_to_ctype +from pyopencl.tools import dtype_to_ctype, get_arg_offset_adjuster_code from pytools import memoize, memoize_method, Record from mako.template import Template @@ -609,8 +609,8 @@ typedef ${index_type} index_type; #define LIST_ARG_DECL ${user_list_arg_decl} #define LIST_ARGS ${user_list_args} -#define USER_ARG_DECL ${user_arg_decl} -#define USER_ARGS ${user_args} +#define USER_ARG_DECL ${user_arg_decl_no_offset} +#define USER_ARGS ${user_args_no_offset} // }}} @@ -622,7 +622,8 @@ __kernel %if do_not_vectorize: __attribute__((reqd_work_group_size(1, 1, 1))) %endif -void ${kernel_name}(${kernel_list_arg_decl} USER_ARG_DECL index_type n) +void ${kernel_name}( + ${kernel_list_arg_decl} ${user_arg_decl_with_offset} index_type n) { %if not do_not_vectorize: @@ -663,6 +664,7 @@ void ${kernel_name}(${kernel_list_arg_decl} USER_ARG_DECL index_type n) %endfor %endif + ${arg_offset_adjustment} generate(${kernel_list_arg_values} USER_ARGS i); %if is_count_stage: @@ -817,6 +819,16 @@ class ListOfListsBuilder: from pyopencl.tools import parse_arg_list self.arg_decls = parse_arg_list(arg_decls) + # To match with the signature of the user-supplied generate(), arguments + # can't appear to have offsets. + arg_decls_no_offset = [] + from pyopencl.tools import VectorArg + for arg in self.arg_decls: + if isinstance(arg, VectorArg) and arg.with_offset: + arg = VectorArg(arg.dtype, arg.name) + arg_decls_no_offset.append(arg) + self.arg_decls_no_offset = arg_decls_no_offset + self.count_sharing = count_sharing self.name_prefix = name_prefix @@ -925,8 +937,10 @@ class ListOfListsBuilder: kernel_list_arg_values=_get_arg_list(user_list_args, prefix="&"), user_list_arg_decl=_get_arg_decl(user_list_args), user_list_args=_get_arg_list(user_list_args), - user_arg_decl=_get_arg_decl(self.arg_decls), - user_args=_get_arg_list(self.arg_decls), + user_arg_decl_with_offset=_get_arg_decl(self.arg_decls), + user_arg_decl_no_offset=_get_arg_decl(self.arg_decls_no_offset), + user_args_no_offset=_get_arg_list(self.arg_decls_no_offset), + arg_offset_adjustment=get_arg_offset_adjuster_code(self.arg_decls), list_names_and_dtypes=self.list_names_and_dtypes, count_sharing=self.count_sharing, @@ -996,8 +1010,10 @@ class ListOfListsBuilder: kernel_list_arg_values=kernel_list_arg_values, user_list_arg_decl=_get_arg_decl(user_list_args), user_list_args=_get_arg_list(user_list_args), - user_arg_decl=_get_arg_decl(self.arg_decls), - user_args=_get_arg_list(self.arg_decls), + user_arg_decl_with_offset=_get_arg_decl(self.arg_decls), + user_arg_decl_no_offset=_get_arg_decl(self.arg_decls_no_offset), + user_args_no_offset=_get_arg_list(self.arg_decls_no_offset), + arg_offset_adjustment=get_arg_offset_adjuster_code(self.arg_decls), list_names_and_dtypes=self.list_names_and_dtypes, count_sharing=self.count_sharing, @@ -1095,6 +1111,9 @@ class ListOfListsBuilder: if self.eliminate_empty_output_lists: compress_kernel = self.get_compress_kernel(index_dtype) + from pyopencl.tools import expand_runtime_arg_list + args = expand_runtime_arg_list(self.arg_decls, args) + # {{{ allocate memory for counts for name, dtype in self.list_names_and_dtypes: diff --git a/pyopencl/scan.py b/pyopencl/scan.py index 915c50e3..6291de9b 100644 --- a/pyopencl/scan.py +++ b/pyopencl/scan.py @@ -1480,16 +1480,6 @@ class GenericScanKernel(_GenericScanKernelBase): # We're done here. (But pretend to return an event.) return cl.enqueue_marker(queue, wait_for=wait_for) - data_args = [] - from pyopencl.tools import VectorArg - for arg_descr, arg_val in zip(self.parsed_args, args): - if isinstance(arg_descr, VectorArg): - data_args.append(arg_val.base_data) - if arg_descr.with_offset: - data_args.append(arg_val.offset) - else: - data_args.append(arg_val) - # }}} l1_info = self.first_level_scan_info @@ -1520,6 +1510,9 @@ class GenericScanKernel(_GenericScanKernelBase): # }}} + from pyopencl.tools import expand_runtime_arg_list + data_args = list(expand_runtime_arg_list(self.parsed_args, args)) + # {{{ first level scan of interval (one interval per block) scan1_args = data_args + [ diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 05ccc5d0..b1dcf9f2 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -401,6 +401,17 @@ def get_arg_offset_adjuster_code(arg_types): return "\n".join(result) +def expand_runtime_arg_list(args, user_args): + data_args = [] + for arg_descr, arg_val in zip(args, user_args): + if isinstance(arg_descr, VectorArg): + data_args.append(arg_val.base_data) + if arg_descr.with_offset: + data_args.append(arg_val.offset) + else: + data_args.append(arg_val) + return tuple(data_args) + # }}} diff --git a/test/test_algorithm.py b/test/test_algorithm.py index d63ed788..a4eb7b4b 100644 --- a/test/test_algorithm.py +++ b/test/test_algorithm.py @@ -880,6 +880,33 @@ def test_list_builder(ctx_factory): assert (inf.lists.get()[-6:] == [1, 2, 2, 3, 3, 3]).all() +def test_list_builder_with_offset(ctx_factory): + from pytest import importorskip + importorskip("mako") + + context = ctx_factory() + queue = cl.CommandQueue(context) + + from pyopencl.algorithm import ListOfListsBuilder + from pyopencl.tools import VectorArg + builder = ListOfListsBuilder(context, [("mylist", np.int32)], """//CL// + void generate(LIST_ARG_DECL USER_ARG_DECL index_type i) + { + APPEND_mylist(input_list[i]); + } + """, arg_decls=[ + VectorArg(float, "input_list", with_offset=True)]) + + input_list = cl.array.zeros(queue, (20,), float) + input_list[10:] = 1 + + result, evt = builder(queue, 10, input_list[10:]) + + inf = result["mylist"] + assert inf.count == 10 + assert (inf.lists.get() == 1).all() + + def test_list_builder_with_empty_elim(ctx_factory): from pytest import importorskip importorskip("mako") -- GitLab From 60f6ff470373710d44fedfc42a0418e076d51c5e Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 13 Nov 2018 21:01:30 -0500 Subject: [PATCH 2/3] Fix location of argument processing code in scan.py --- pyopencl/scan.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyopencl/scan.py b/pyopencl/scan.py index 6291de9b..6218d571 100644 --- a/pyopencl/scan.py +++ b/pyopencl/scan.py @@ -1480,6 +1480,9 @@ class GenericScanKernel(_GenericScanKernelBase): # We're done here. (But pretend to return an event.) return cl.enqueue_marker(queue, wait_for=wait_for) + from pyopencl.tools import expand_runtime_arg_list + data_args = list(expand_runtime_arg_list(self.parsed_args, args)) + # }}} l1_info = self.first_level_scan_info @@ -1510,9 +1513,6 @@ class GenericScanKernel(_GenericScanKernelBase): # }}} - from pyopencl.tools import expand_runtime_arg_list - data_args = list(expand_runtime_arg_list(self.parsed_args, args)) - # {{{ first level scan of interval (one interval per block) scan1_args = data_args + [ -- GitLab From 66f6e1508156f6bcfb923c7e9a1989d5fc0a28bc Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Tue, 13 Nov 2018 20:13:00 -0600 Subject: [PATCH 3/3] Set test case size to 10000. --- test/test_algorithm.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test_algorithm.py b/test/test_algorithm.py index a4eb7b4b..5c09b565 100644 --- a/test/test_algorithm.py +++ b/test/test_algorithm.py @@ -897,13 +897,14 @@ def test_list_builder_with_offset(ctx_factory): """, arg_decls=[ VectorArg(float, "input_list", with_offset=True)]) - input_list = cl.array.zeros(queue, (20,), float) + n = 10000 + input_list = cl.array.zeros(queue, (n + 10,), float) input_list[10:] = 1 - result, evt = builder(queue, 10, input_list[10:]) + result, evt = builder(queue, n, input_list[10:]) inf = result["mylist"] - assert inf.count == 10 + assert inf.count == n assert (inf.lists.get() == 1).all() -- GitLab