From 5c89b5aec7c8ab2c286e2d0afcb1f69567915094 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 20 Mar 2011 05:52:46 -0400 Subject: [PATCH] Finish up, document parallel scan. --- doc/source/array.rst | 58 ++++++++++++++++++++++++++++++++++------- doc/source/misc.rst | 3 ++- doc/source/runtime.rst | 6 ++++- pyopencl/__init__.py | 5 +++- pyopencl/_cluda.py | 26 ++++++++++++++++++ pyopencl/elementwise.py | 2 +- pyopencl/reduction.py | 4 +-- pyopencl/scan.py | 8 +++++- 8 files changed, 96 insertions(+), 16 deletions(-) create mode 100644 pyopencl/_cluda.py diff --git a/doc/source/array.rst b/doc/source/array.rst index c6586eb0..eb5b69a4 100644 --- a/doc/source/array.rst +++ b/doc/source/array.rst @@ -421,7 +421,7 @@ Custom Reductions .. module:: pyopencl.reduction -.. class:: ReductionKernel(ctx, dtype_out, neutral, reduce_expr, map_expr=None, arguments=None, name="reduce_kernel", options="", preamble="") +.. class:: ReductionKernel(ctx, dtype_out, neutral, reduce_expr, map_expr=None, arguments=None, name="reduce_kernel", options=[], preamble="") Generate a kernel that takes a number of scalar or vector *arguments* (at least one vector argument), performs the *map_expr* on each entry of @@ -437,15 +437,15 @@ Custom Reductions assumed. *dtype_out* specifies the :class:`numpy.dtype` in which the reduction is - performed and in which the result is returned. *neutral* is - specified as float or integer formatted as string. *reduce_expr* and - *map_expr* are specified as string formatted operations and *arguments* - is specified as a string formatted as a C argument list. *name* specifies - the name as which the kernel is compiled. *options* are passed - unmodified to :meth:`pyopencl.Program.build`. *preamble* is specified - as a string of code. + performed and in which the result is returned. *neutral* is specified as + float or integer formatted as string. *reduce_expr* and *map_expr* are + specified as string formatted operations and *arguments* is specified as a + string formatted as a C argument list. *name* specifies the name as which + the kernel is compiled. *options* are passed unmodified to + :meth:`pyopencl.Program.build`. *preamble* specifies a string of code that + is inserted before the actual kernels. - .. method __call__(*args, queue=None) + .. method:: __call__(*args, queue=None) .. versionadded: 2011.1 @@ -460,6 +460,46 @@ Here's a usage example:: my_dot_prod = krnl(a, b).get() +Parallel Scan / Prefix Sum +-------------------------- + +.. module:: pyopencl.scan + +.. class:: ExclusiveScanKernel(ctx, dtype, scan_expr, neutral=None, name_prefix="scan", options=[], preamble="", devices=None) + + Generates a kernel that can compute a `prefix sum `_ + using any associative operation given as *scan_expr*. + *scan_expr* uses the formal values "a" and "b" to indicate two operands of + an associative binary operation. *neutral* is the neutral element + of *scan_expr*, obeying *scan_expr(a, neutral) == a*. + + *dtype* specifies the type of the arrays being operated on. + *name_prefix* is used for kernel names to ensure recognizability + in profiles and logs. *options* is a list of compiler options to use + when building. *preamble* specifies a string of code that is + inserted before the actual kernels. *devices* may be used to restrict + the set of devices on which the kernel is meant to run. (defaults + to all devices in the context *ctx*. + + .. method:: __call__(self, input_ary, output_ary=None, allocator=None, queue=None) + +.. class:: InclusiveScanKernel(dtype, scan_expr, neutral=None, name_prefix="scan", options=[], preamble="", devices=None) + + Works like :class:`ExclusiveScanKernel`. Unlike the exclusive case, + *neutral* is not required. + +Here's a usage example:: + + knl = InclusiveScanKernel(context, np.int32, "a+b") + + n = 2**20-2**18+5 + host_data = np.random.randint(0, 10, n).astype(np.int32) + dev_data = cl_array.to_device(queue, host_data) + + knl(dev_data) + assert (dev_data.get() == np.cumsum(host_data, axis=0)).all() + + Fast Fourier Transforms ----------------------- diff --git a/doc/source/misc.rst b/doc/source/misc.rst index e64cdf78..7ae20c5e 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -86,6 +86,7 @@ Version 2011.1 :func:`pyopencl.enqueue_map_image`. * Add :mod:`pyopencl.reduction`. * Add :ref:`reductions`. +* Add :mod:`pyopencl.scan`. * Add :meth:`pyopencl.MemoryObject.get_host_array`. * Deprecate context arguments of :func:`pyopencl.array.to_device`, @@ -104,7 +105,7 @@ Version 0.92 extension, leading to working GL interoperability. * Add :meth:`pyopencl.Kernel.set_args`. * The call signature of :meth:`pyopencl.Kernel.__call__` changed to - emphasize the importance of *loccal_size*. + emphasize the importance of *local_size*. * Add :meth:`pyopencl.Kernel.set_scalar_arg_dtypes`. * Add support for the `cl_nv_device_attribute_query `_ diff --git a/doc/source/runtime.rst b/doc/source/runtime.rst index 35774022..aab26ef2 100644 --- a/doc/source/runtime.rst +++ b/doc/source/runtime.rst @@ -571,11 +571,15 @@ Programs and Kernels See :class:`program_build_info` for values of *param*. - .. method:: build(options="", devices=None) + .. method:: build(options=[], devices=None) *options* is a string of compiler flags. Returns *self*. + .. versionchanged:: 2011.1 + + *options* may now also be a :class:`list` of :class:`str`. + .. attribute:: kernel_name :class:`Kernel` objects can be produced from a built diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 83f9d3a7..feab7347 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -136,7 +136,10 @@ def _add_functionality(): else: return self.get_info(pi_attr) - def program_build(self, options="", devices=None): + def program_build(self, options=[], devices=None): + if isinstance(options, list): + options = " ".join(options) + try: self._build(options=options, devices=devices) except Exception, e: diff --git a/pyopencl/_cluda.py b/pyopencl/_cluda.py new file mode 100644 index 00000000..957a8348 --- /dev/null +++ b/pyopencl/_cluda.py @@ -0,0 +1,26 @@ +CLUDA_PREAMBLE = """ +#define local_barrier() barrier(CLK_LOCAL_MEM_FENCE); + +#define WITHIN_KERNEL /* empty */ +#define KERNEL __kernel +#define GLOBAL_MEM __global +#define LOCAL_MEM __local +#define LOCAL_MEM_ARG __local +#define REQD_WG_SIZE(X,Y,Z) __attribute__((reqd_work_group_size(X, Y, Z))) + +#define LID_0 get_local_id(0) +#define LID_1 get_local_id(1) +#define LID_2 get_local_id(2) + +#define GID_0 get_group_id(0) +#define GID_1 get_group_id(1) +#define GID_2 get_group_id(2) + +% if double_support: + #pragma OPENCL EXTENSION cl_khr_fp64: enable +% endif +""" + + + + diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py index f3994ed6..6bcdc004 100644 --- a/pyopencl/elementwise.py +++ b/pyopencl/elementwise.py @@ -70,7 +70,7 @@ def get_elwise_program(context, arguments, operation, "after_loop": after_loop, }) - return Program(context, source).build(options=" ".join(options)) + return Program(context, source).build(options) diff --git a/pyopencl/reduction.py b/pyopencl/reduction.py index 605008d4..ebde742d 100644 --- a/pyopencl/reduction.py +++ b/pyopencl/reduction.py @@ -211,7 +211,7 @@ def get_reduction_kernel( ctx, out_type, out_type_size, neutral, reduce_expr, map_expr=None, arguments=None, name="reduce_kernel", preamble="", - device=None, options="", max_group_size=None): + device=None, options=[], max_group_size=None): if map_expr is None: map_expr = "in[i]" @@ -248,7 +248,7 @@ def get_reduction_kernel( class ReductionKernel: def __init__(self, ctx, dtype_out, neutral, reduce_expr, map_expr=None, arguments=None, - name="reduce_kernel", options="", preamble=""): + name="reduce_kernel", options=[], preamble=""): dtype_out = self.dtype_out = np.dtype(dtype_out) diff --git a/pyopencl/scan.py b/pyopencl/scan.py index 1701e168..02679a31 100644 --- a/pyopencl/scan.py +++ b/pyopencl/scan.py @@ -393,7 +393,10 @@ if _CL_MODE: class _ScanKernelBase(object): def __init__(self, ctx, dtype, scan_expr, neutral=None, - name_prefix="scan", options="", preamble="", devices=None): + name_prefix="scan", options=[], preamble="", devices=None): + + if isinstance(self, ExclusiveScanKernel) and neutral is None: + raise ValueError("neutral element is required for exclusive scan") self.context = ctx dtype = self.dtype = np.dtype(dtype) @@ -512,6 +515,9 @@ else: scan_expr, neutral=None, name_prefix="scan", options=[], preamble="", devices=None): + if isinstance(self, ExclusiveScanKernel) and neutral is None: + raise ValueError("neutral element is required for exclusive scan") + dtype = self.dtype = np.dtype(dtype) self.neutral = neutral -- GitLab