diff --git a/doc/misc.rst b/doc/misc.rst index b4e30db755258400525464fdfd4b88d2e082582d..71aea7922305b5fcfaa91850915ffdfd3cb62eca 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -209,7 +209,7 @@ other software to be turned into the corresponding :mod:`pyopencl` objects. User-visible Changes ==================== -Version 2017.2 +Version 2018.2 -------------- .. note:: @@ -217,6 +217,10 @@ Version 2017.2 This version is currently under development. You can get snapshots from PyOpenCL's `git repository `_ +* Use pybind11. +* Many bug fixes. +* Support arrays with offsets in scan kernels. + Version 2018.1 -------------- diff --git a/pyopencl/scan.py b/pyopencl/scan.py index 6a59808137d1ac8ed99b936db1c91c1a0e7d11e3..ac8052e87d3bebd01b3117ae1d8251d0083241aa 100644 --- a/pyopencl/scan.py +++ b/pyopencl/scan.py @@ -35,7 +35,8 @@ from pyopencl.tools import (dtype_to_ctype, bitlog2, KernelTemplateBase, _process_code_for_macro, get_arg_list_scalar_arg_dtypes, context_dependent_memoize, - _NumpyTypesKeyBuilder) + _NumpyTypesKeyBuilder, + get_arg_offset_adjuster_code) import pyopencl._mymako as mako from pyopencl._cluda import CLUDA_PREAMBLE @@ -148,6 +149,8 @@ void ${kernel_name}( %endif ) { + ${arg_offset_adjustment} + // index K in first dimension used for carry storage %if use_bank_conflict_avoidance: // Avoid bank conflicts by adding a single 32-bit value to the size of @@ -618,6 +621,8 @@ void ${name_prefix}_final_update( %endif ) { + ${arg_offset_adjustment} + %if use_lookbehind_update: LOCAL_MEM scan_type ldata[WG_SIZE]; %endif @@ -998,7 +1003,7 @@ class _GenericScanKernelBase(object): resulting in a C `bool` value that determines whether a new scan segments starts at index *i*. If given, makes the scan a segmented scan. Has access to the current index `i`, the result - of *input_expr* as a, and in addition may use *arguments* and + of *input_expr* as `a`, and in addition may use *arguments* and *input_fetch_expr* variables just like *input_expr*. If it returns true, then previous sums will not spill over into the @@ -1346,6 +1351,7 @@ class GenericScanKernel(_GenericScanKernelBase): final_update_src = str(final_update_tpl.render( wg_size=update_wg_size, output_statement=self.output_statement, + arg_offset_adjustment=get_arg_offset_adjuster_code(self.parsed_args), argument_signature=", ".join( arg.declarator() for arg in self.parsed_args), is_segment_start_expr=self.is_segment_start_expr, @@ -1421,6 +1427,7 @@ class GenericScanKernel(_GenericScanKernelBase): wg_size=wg_size, input_expr=input_expr, k_group_size=k_group_size, + arg_offset_adjustment=get_arg_offset_adjuster_code(arguments), argument_signature=", ".join(arg.declarator() for arg in arguments), is_segment_start_expr=is_segment_start_expr, input_fetch_exprs=input_fetch_exprs, @@ -1475,7 +1482,9 @@ class GenericScanKernel(_GenericScanKernelBase): from pyopencl.tools import VectorArg for arg_descr, arg_val in zip(self.parsed_args, args): if isinstance(arg_descr, VectorArg): - data_args.append(arg_val.data) + data_args.append(arg_val.base_data) + if arg_descr.with_offset: + data_args.append(arg_val.offset) else: data_args.append(arg_val) @@ -1583,6 +1592,8 @@ void ${name_prefix}_debug_scan( scan_type current = ${neutral}; scan_type prev; + ${arg_offset_adjustment} + for (index_type i = 0; i < N; ++i) { %for name, arg_name, ife_offset in input_fetch_exprs: @@ -1636,6 +1647,7 @@ class GenericDebugScanKernel(_GenericScanKernelBase): scan_tpl = _make_template(DEBUG_SCAN_TEMPLATE) scan_src = str(scan_tpl.render( output_statement=self.output_statement, + arg_offset_adjustment=get_arg_offset_adjuster_code(self.parsed_args), argument_signature=", ".join( arg.declarator() for arg in self.parsed_args), is_segment_start_expr=self.is_segment_start_expr, @@ -1680,7 +1692,9 @@ class GenericDebugScanKernel(_GenericScanKernelBase): from pyopencl.tools import VectorArg for arg_descr, arg_val in zip(self.parsed_args, args): if isinstance(arg_descr, VectorArg): - data_args.append(arg_val.data) + data_args.append(arg_val.base_data) + if arg_descr.with_offset: + data_args.append(arg_val.offset) else: data_args.append(arg_val) diff --git a/test/test_algorithm.py b/test/test_algorithm.py index 5264767c4094806fe43fdfea6056237aed20ade4..d63ed7881222aa9ed3a7c4b646bd05ffc4cc6866 100644 --- a/test/test_algorithm.py +++ b/test/test_algorithm.py @@ -569,6 +569,38 @@ def test_scan(ctx_factory, dtype, scan_cls): collect() +@pytest.mark.parametrize("scan_cls", (GenericScanKernel, GenericDebugScanKernel)) +def test_scan_with_vectorargs_with_offsets(ctx_factory, scan_cls): + context = ctx_factory() + queue = cl.CommandQueue(context) + + from pyopencl.tools import VectorArg + + knl = scan_cls( + context, float, + arguments=[ + VectorArg(float, "input", with_offset=True), + VectorArg(int, "segment", with_offset=True), + ], + input_expr="input[i]", + is_segment_start_expr="segment[i]", + scan_expr="a+b", neutral="0", + output_statement=""" + input[i] = item; + """) + + n = 20 + + host_data = np.random.randint(0, 10, n).astype(float) + dev_data = cl.array.to_device(queue, host_data) + segment_data = np.zeros(n, dtype=int) + dev_segment_data = cl.array.to_device(queue, segment_data) + + knl(dev_data, dev_segment_data) + + assert (dev_data.get() == np.cumsum(host_data)).all() + + def test_copy_if(ctx_factory): from pytest import importorskip importorskip("mako") @@ -654,7 +686,6 @@ def test_index_preservation(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) - from pyopencl.scan import GenericScanKernel, GenericDebugScanKernel classes = [GenericScanKernel] dev = context.devices[0]