diff --git a/doc/source/array.rst b/doc/source/array.rst index 2d18141b48479e126739ba0cc5cc1935bbe9c3fa..6c3272f0454a8847fc338c464cdfffa6fe387ead 100644 --- a/doc/source/array.rst +++ b/doc/source/array.rst @@ -108,247 +108,75 @@ Under the hood, the complex types are simply `float2` and `double2`. The :class:`Array` Class ------------------------ -.. class:: Array(cqa, shape, dtype, order="C", *, allocator=None, base=None, data=None) - - A :class:`numpy.ndarray` work-alike that stores its data and performs its - computations on the compute device. *shape* and *dtype* work exactly as in - :mod:`numpy`. Arithmetic methods in :class:`Array` support the - broadcasting of scalars. (e.g. `array+5`) - - *cqa* must be a :class:`pyopencl.CommandQueue`. *cqa* - specifies the queue in which the array carries out its - computations by default. *cqa* will at some point be renamed *queue*, - so it should be considered 'positional-only'. - - *allocator* may be `None` or a callable that, upon being called with an - argument of the number of bytes to be allocated, returns an - :class:`pyopencl.Buffer` object. (A :class:`pyopencl.tools.MemoryPool` - instance is one useful example of an object to pass here.) - - .. versionchanged:: 2011.1 - Renamed *context* to *cqa*, made it general-purpose. - - All arguments beyond *order* should be considered keyword-only. - - .. attribute :: data - - The :class:`pyopencl.MemoryObject` instance created for the memory that backs - this :class:`Array`. - - .. attribute :: shape - - The tuple of lengths of each dimension in the array. - - .. attribute :: dtype - - The :class:`numpy.dtype` of the items in the GPU array. - - .. attribute :: size - - The number of meaningful entries in the array. Can also be computed by - multiplying up the numbers in :attr:`shape`. - - .. attribute :: nbytes - - The size of the entire array in bytes. Computed as :attr:`size` times - ``dtype.itemsize``. - - .. attribute :: strides - - Tuple of bytes to step in each dimension when traversing an array. - - .. attribute :: flags - - Return an object with attributes `c_contiguous`, `f_contiguous` and `forc`, - which may be used to query contiguity properties in analogy to - :attr:`numpy.ndarray.flags`. - - .. method :: __len__() - - Returns the size of the leading dimension of *self*. - - .. method :: reshape(shape) - - Returns an array containing the same data with a new shape. - - .. method :: ravel() - - Returns flattened array containing the same data. - - .. method :: view(dtype=None) - - Returns view of array with the same data. If *dtype* is different from - current dtype, the actual bytes of memory will be reinterpreted. - - .. method :: set(ary, queue=None, async=False) - - Transfer the contents the :class:`numpy.ndarray` object *ary* - onto the device. - - *ary* must have the same dtype and size (not necessarily shape) as *self*. - - - .. method :: get(queue=None, ary=None, async=False) - - Transfer the contents of *self* into *ary* or a newly allocated - :mod:`numpy.ndarray`. If *ary* is given, it must have the right - size (not necessarily shape) and dtype. - - .. method :: copy(queue=None) - - .. versionadded:: 2013.1 - - .. method :: __str__() - .. method :: __repr__() - - .. method :: mul_add(self, selffac, other, otherfac, queue=None): - - Return `selffac*self + otherfac*other`. - - .. method :: __add__(other) - .. method :: __sub__(other) - .. method :: __iadd__(other) - .. method :: __isub__(other) - .. method :: __neg__(other) - .. method :: __mul__(other) - .. method :: __div__(other) - .. method :: __rdiv__(other) - .. method :: __pow__(other) - - .. method :: __abs__() - - Return a :class:`Array` containing the absolute value of each - element of *self*. +.. autoclass:: Array + + .. automethod :: __len__ + .. automethod :: reshape + .. automethod :: ravel + .. automethod :: view + .. automethod :: set + .. automethod :: get + .. automethod :: copy + + .. automethod :: __str__ + .. automethod :: __repr__ + + .. automethod :: mul_add + .. automethod :: __add__ + .. automethod :: __sub__ + .. automethod :: __iadd__ + .. automethod :: __isub__ + .. automethod :: __neg__ + .. automethod :: __mul__ + .. automethod :: __div__ + .. automethod :: __rdiv__ + .. automethod :: __pow__ + + .. automethod :: __abs__ .. UNDOC reverse() - .. method :: fill(scalar, queue=None) - - Fill the array with *scalar*. + .. automethod :: fill - .. method :: astype(dtype, queue=None) + .. automethod :: astype - Return *self*, cast to *dtype*. - - .. attribute :: real - - .. versionadded:: 2012.1 - - .. attribute :: imag - - .. versionadded:: 2012.1 - - .. method :: conj() - - .. versionadded:: 2012.1 + .. autoattribute :: real + .. autoattribute :: imag + .. automethod :: conj Constructing :class:`Array` Instances ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. function:: to_device(queue, ary, allocator=None, async=False) - - Return a :class:`Array` that is an exact copy of the :class:`numpy.ndarray` - instance *ary*. - - See :class:`Array` for the meaning of *allocator*. - - .. versionchanged:: 2011.1 - *context* argument was deprecated. - +.. autofunction:: to_device .. function:: empty(queue, shape, dtype, order="C", allocator=None, data=None) A synonym for the :class:`Array` constructor. -.. function:: zeros(queue, shape, dtype, order="C", allocator=None) - - Same as :func:`empty`, but the :class:`Array` is zero-initialized before - being returned. - - .. versionchanged:: 2011.1 - *context* argument was deprecated. - -.. function:: empty_like(other_ary) - - Make a new, uninitialized :class:`Array` having the same properties - as *other_ary*. - -.. function:: zeros_like(other_ary) - - Make a new, zero-initialized :class:`Array` having the same properties - as *other_ary*. - -.. function:: arange(queue, start, stop, step, dtype=None, allocator=None) - - Create a :class:`Array` filled with numbers spaced `step` apart, - starting from `start` and ending at `stop`. - - For floating point arguments, the length of the result is - `ceil((stop - start)/step)`. This rule may result in the last - element of the result being greater than `stop`. - - *dtype*, if not specified, is taken as the largest common type - of *start*, *stop* and *step*. - - .. versionchanged:: 2011.1 - *context* argument was deprecated. - - .. versionchanged:: 2011.2 - *allocator* keyword argument was added. - -.. function:: take(a, indices, out=None, queue=None) - - Return the :class:`Array` ``[a[indices[0]], ..., a[indices[n]]]``. - For the moment, *a* must be a type that can be bound to a texture. +.. autofunction:: zeros +.. autofunction:: empty_like +.. autofunction:: zeros_like +.. autofunction:: arange +.. autofunction:: take Conditionals ^^^^^^^^^^^^ -.. function:: if_positive(criterion, then_, else_, out=None, queue=None) - - Return an array like *then_*, which, for the element at index *i*, - contains *then_[i]* if *criterion[i]>0*, else *else_[i]*. - -.. function:: maximum(a, b, out=None, queue=None) - - Return the elementwise maximum of *a* and *b*. - -.. function:: minimum(a, b, out=None, queue=None) - - Return the elementwise minimum of *a* and *b*. +.. autofunction:: if_positive +.. autofunction:: maximum +.. autofunction:: minimum .. _reductions: - Reductions ^^^^^^^^^^ -.. function:: sum(a, dtype=None, queue=None) - - .. versionadded: 2011.1 - -.. function:: dot(a, b, dtype=None, queue=None) - - .. versionadded: 2011.1 - -.. function:: subset_dot(subset, a, b, dtype=None, queue=None) - - .. versionadded: 2011.1 - -.. function:: max(a, queue=None) - - .. versionadded: 2011.1 - -.. function:: min(a, queue=None) - - .. versionadded: 2011.1 - -.. function:: subset_max(subset, a, queue=None) - - .. versionadded: 2011.1 - -.. function:: subset_min(subset, a, queue=None) - - .. versionadded: 2011.1 +.. autofunction:: sum +.. autofunction:: dot +.. autofunction:: subset_dot +.. autofunction:: max +.. autofunction:: min +.. autofunction:: subset_max +.. autofunction:: subset_min See also :ref:`custom-reductions`. @@ -464,93 +292,18 @@ functions available in the OpenCL standard. (See table 6.8 in the spec.) Generating Arrays of Random Numbers ----------------------------------- -.. module:: pyopencl.clrandom - -.. class:: RanluxGenerator(self, queue, num_work_items=None, luxury=2, seed=None, max_work_items=None) - - :param queue: :class:`pyopencl.CommandQueue`, only used for initialization - :param luxury: the "luxury value" of the generator, and should be 0-4, where 0 is fastest - and 4 produces the best numbers. It can also be >=24, in which case it directly - sets the p-value of RANLUXCL. - :param num_work_items: is the number of generators to initialize, usually corresponding - to the number of work-items in the NDRange RANLUXCL will be used with. - May be `None`, in which case a default value is used. - :param max_work_items: should reflect the maximum number of work-items that will be used - on any parallel instance of RANLUXCL. So for instance if we are launching 5120 - work-items on GPU1 and 10240 work-items on GPU2, GPU1's RANLUXCLTab would be - generated by calling ranluxcl_intialization with numWorkitems = 5120 while - GPU2's RANLUXCLTab would use numWorkitems = 10240. However maxWorkitems must - be at least 10240 for both GPU1 and GPU2, and it must be set to the same value - for both. (may be `None`) - - .. versionadded:: 2011.2 - - .. versionchanged:: 2013.1 - Added default value for `num_work_items`. - - .. attribute:: state - - A :class:`pyopencl.array.Array` containing the state of the generator. - - .. attribute:: nskip - - nskip is an integer which can (optionally) be defined in the kernel code - as RANLUXCL_NSKIP. If this is done the generator will be faster for luxury setting - 0 and 1, or when the p-value is manually set to a multiple of 24. - - .. method:: fill_uniform(ary, a=0, b=1, queue=None) - - Fill *ary* with uniformly distributed random numbers in the interval - *(a, b)*, endpoints excluded. - - .. method:: uniform(queue, shape, dtype, order="C", allocator=None, base=None, data=None, a=0, b=1) - - Make a new empty array, apply :meth:`fill_uniform` to it. - - .. method:: fill_normal(ary, mu=0, sigma=1, queue=None): - - Fill *ary* with normally distributed numbers with mean *mu* and - standard deviation *sigma*. - - .. method:: normal(queue, shape, dtype, order="C", allocator=None, base=None, data=None, mu=0, sigma=1) - - Make a new empty array, apply :meth:`fill_normal` to it. - - .. method:: synchronize() - - The generator gets inefficient when different work items invoke - the generator a differing number of times. This function - ensures efficiency. - -.. function:: rand(queue, shape, dtype, a=0, b=1) - - Return an array of `shape` filled with random values of `dtype` - in the range [a,b). - -.. function:: fill_rand(result, queue=None, a=0, b=1) - - Fill *result* with random values of `dtype` in the range [0,1). - -PyOpenCL now includes and uses the `RANLUXCL random number generator -<https://bitbucket.org/ivarun/ranluxcl/>`_ by Ivar Ursin Nikolaisen. In -addition to being usable through the convenience functions above, it is -available in any piece of code compiled through PyOpenCL by:: - - #include <pyopencl-ranluxcl.cl> - -See the `source <https://github.com/inducer/pyopencl/blob/master/src/cl/pyopencl-ranluxcl.cl>`_ -for some documentation if you're planning on using RANLUXCL directly. +.. automodule:: pyopencl.clrandom -The RANLUX generator is described in the following two articles. If you use the -generator for scientific purposes, please consider citing them: + .. autoclass:: RanluxGenerator -* Martin Lüscher, A portable high-quality random number generator for lattice - field theory simulations, `Computer Physics Communications 79 (1994) 100-110 - <http://dx.doi.org/10.1016/0010-4655(94)90232-1>`_ + .. automethod:: fill_uniform + .. automethod:: uniform + .. automethod:: fill_normal + .. automethod:: normal + .. automethod:: synchronize -* F. James, RANLUX: A Fortran implementation of the high-quality pseudorandom - number generator of Lüscher, `Computer Physics Communications 79 (1994) 111-114 - <http://dx.doi.org/10.1016/0010-4655(94)90233-X>`_ + .. autofunction:: rand + .. autofunction:: fill_rand Fast Fourier Transforms ----------------------- diff --git a/doc/source/conf.py b/doc/source/conf.py index 1554d69588b75de3d8e8fbe6786c5eac1e57560b..ccfb977bd06ec91f8817395c80fd6de3adeab203 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -31,6 +31,8 @@ extensions = [ # Add any paths that contain templates here, relative to this directory. templates_path = ['.templates'] +exclude_patterns = ['subst.rst'] + # The suffix of source filenames. source_suffix = '.rst' diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py index 9c33792a4636b481b38920e8c46e59205ce54c5a..e38b4fda424b7b91d8227dccc06230a3ff2a5f06 100644 --- a/pyopencl/algorithm.py +++ b/pyopencl/algorithm.py @@ -1136,7 +1136,7 @@ class KeyValueSorter(object): values, keys, queue=queue, wait_for=wait_for) starts = cl.array.empty(queue, (nkeys+1), starts_dtype, allocator=allocator) - evt = starts.fill_and_return_event(len(values_sorted_by_key), wait_for=[evt]) + evt = starts.fill_evt(len(values_sorted_by_key), wait_for=[evt]) evt = knl_info.start_finder(starts, keys_sorted_by_key, range=slice(len(keys_sorted_by_key)), diff --git a/pyopencl/array.py b/pyopencl/array.py index 7d6999054a21ebe4520a826fa1647783c76056c2..66f4508e59d4084df502c996edf1d99dd17d5c71 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -205,11 +205,58 @@ class DefaultAllocator(cl.tools.DeferredAllocator): # {{{ array class class Array(object): - """A :mod:`pyopencl` Array is used to do array-based calculation on - a compute device. + """A :class:`numpy.ndarray` work-alike that stores its data and performs its + computations on the compute device. *shape* and *dtype* work exactly as in + :mod:`numpy`. Arithmetic methods in :class:`Array` support the + broadcasting of scalars. (e.g. `array+5`) - This is mostly supposed to be a :mod:`numpy`-workalike. Operators - work on an element-by-element basis, just like :class:`numpy.ndarray`. + *cqa* must be a :class:`pyopencl.CommandQueue`. *cqa* + specifies the queue in which the array carries out its + computations by default. *cqa* will at some point be renamed *queue*, + so it should be considered 'positional-only'. + + *allocator* may be `None` or a callable that, upon being called with an + argument of the number of bytes to be allocated, returns an + :class:`pyopencl.Buffer` object. (A :class:`pyopencl.tools.MemoryPool` + instance is one useful example of an object to pass here.) + + .. versionchanged:: 2011.1 + Renamed *context* to *cqa*, made it general-purpose. + + All arguments beyond *order* should be considered keyword-only. + + .. attribute :: data + + The :class:`pyopencl.MemoryObject` instance created for the memory that backs + this :class:`Array`. + + .. attribute :: shape + + The tuple of lengths of each dimension in the array. + + .. attribute :: dtype + + The :class:`numpy.dtype` of the items in the GPU array. + + .. attribute :: size + + The number of meaningful entries in the array. Can also be computed by + multiplying up the numbers in :attr:`shape`. + + .. attribute :: nbytes + + The size of the entire array in bytes. Computed as :attr:`size` times + ``dtype.itemsize``. + + .. attribute :: strides + + Tuple of bytes to step in each dimension when traversing an array. + + .. attribute :: flags + + Return an object with attributes `c_contiguous`, `f_contiguous` and `forc`, + which may be used to query contiguity properties in analogy to + :attr:`numpy.ndarray.flags`. """ def __init__(self, cqa, shape, dtype, order="C", allocator=None, @@ -375,6 +422,12 @@ class Array(object): kernel_specific_max_wg_size=kernel_specific_max_wg_size) def set(self, ary, queue=None, async=False): + """Transfer the contents the :class:`numpy.ndarray` object *ary* + onto the device. + + *ary* must have the same dtype and size (not necessarily shape) as *self*. + """ + assert ary.size == self.size assert ary.dtype == self.dtype @@ -394,6 +447,11 @@ class Array(object): is_blocking=not async) def get(self, queue=None, ary=None, async=False): + """Transfer the contents of *self* into *ary* or a newly allocated + :mod:`numpy.ndarray`. If *ary* is given, it must have the right + size (not necessarily shape) and dtype. + """ + if ary is None: ary = np.empty(self.shape, self.dtype) @@ -413,6 +471,8 @@ class Array(object): return ary def copy(self, queue=None): + """.. versionadded:: 2013.1""" + queue = queue or self.queue result = self._new_like_me() cl.enqueue_copy(queue, result.data, self.data, byte_count=self.nbytes) @@ -427,7 +487,8 @@ class Array(object): def __hash__(self): raise TypeError("pyopencl arrays are not hashable.") - # kernel invocation wrappers ---------------------------------------------- + # {{{ kernel invocation wrappers + @staticmethod @elwise_kernel_runner def _axpbyz(out, afac, a, bfac, b, queue=None): @@ -568,7 +629,10 @@ class Array(object): return self.__class__(self.context, self.shape, dtype, strides=strides) - # operators --------------------------------------------------------------- + # }}} + + # {{{ operators + def mul_add(self, selffac, other, otherfac, queue=None): """Return `selffac * self + otherfac*other`. """ @@ -678,9 +742,7 @@ class Array(object): return self def __div__(self, other): - """Divides an array by an array or a scalar:: - - x = self / n + """Divides an array by an array or a scalar, i.e. ``self / other``. """ if isinstance(other, Array): result = self._new_like_me(_get_common_dtype(self, other, self.queue)) @@ -700,9 +762,7 @@ class Array(object): __truediv__ = __div__ def __rdiv__(self,other): - """Divides an array by a scalar or an array:: - - x = n / self + """Divides an array by a scalar or an array, i.e. ``other / self``. """ if isinstance(other, Array): @@ -718,17 +778,23 @@ class Array(object): __rtruediv__ = __rdiv__ - def fill(self, value, queue=None, wait_for=None, return_event=False): - """Fills the array with the specified value.""" + def fill(self, value, queue=None, wait_for=None): + """Fill the array with *scalar*. + + :returns: *self*. + """ self._fill(self, value, queue=queue, wait_for=wait_for) return self - def fill_and_return_event(self, value, queue=None, wait_for=None, return_event=False): - """Fills the array with the specified value.""" + def fill_evt(self, value, queue=None, wait_for=None): + """Fills the array with the specified value. + + :returns: A :class:`pyopencl.Event`. + """ return self._fill(self, value, queue=queue, wait_for=wait_for) def __len__(self): - """Return the size of the leading dimension of self.""" + """Returns the size of the leading dimension of *self*.""" if len(self.shape): return self.shape[0] else: @@ -736,7 +802,7 @@ class Array(object): def __abs__(self): """Return a `Array` of the absolute values of the elements - of `self`. + of *self*. """ result = self._new_like_me(self.dtype.type(0).real.dtype) @@ -766,6 +832,8 @@ class Array(object): self._rpow_scalar(result, common_dtype.type(other), self) return result + # }}} + def reverse(self, queue=None): """Return this array in reversed order. The array is treated as one-dimensional. @@ -776,6 +844,7 @@ class Array(object): return result def astype(self, dtype, queue=None): + """Return *self*, cast to *dtype*.""" if dtype == self.dtype: return self @@ -783,7 +852,8 @@ class Array(object): self._copy(result, self, queue=queue) return result - # rich comparisons (or rather, lack thereof) ------------------------------ + # {{{ rich comparisons (or rather, lack thereof) + def __eq__(self, other): raise NotImplementedError @@ -802,9 +872,10 @@ class Array(object): def __gt__(self, other): raise NotImplementedError + # }}} + # {{{ complex-valued business - @property def real(self): if self.dtype.kind == "c": result = self._new_like_me(self.dtype.type(0).real.dtype) @@ -812,8 +883,8 @@ class Array(object): return result else: return self + real = property(real, doc=".. versionadded:: 2012.1") - @property def imag(self): if self.dtype.kind == "c": result = self._new_like_me(self.dtype.type(0).real.dtype) @@ -821,8 +892,10 @@ class Array(object): return result else: return zeros_like(self) + imag = property(imag, doc=".. versionadded:: 2012.1") def conj(self): + """.. versionadded:: 2012.1""" if self.dtype.kind == "c": result = self._new_like_me() self._conj(result, self) @@ -835,6 +908,7 @@ class Array(object): # {{{ views def reshape(self, *shape): + """Returns an array containing the same data with a new shape.""" # TODO: add more error-checking, perhaps if isinstance(shape[0], tuple) or isinstance(shape[0], list): shape = tuple(shape[0]) @@ -845,9 +919,14 @@ class Array(object): return self._new_with_changes(data=self.data, shape=shape) def ravel(self): + """Returns flattened array containing the same data.""" return self.reshape(self.size) def view(self, dtype=None): + """Returns view of array with the same data. If *dtype* is different from + current dtype, the actual bytes of memory will be reinterpreted. + """ + if dtype is None: dtype = self.dtype @@ -887,7 +966,14 @@ def as_strided(ary, shape=None, strides=None): # {{{ creation helpers def to_device(queue, ary, allocator=None, async=False): - """Converts a numpy array to a :class:`Array`.""" + """Return a :class:`Array` that is an exact copy of the :class:`numpy.ndarray` + instance *ary*. + + See :class:`Array` for the meaning of *allocator*. + + .. versionchanged:: 2011.1 + *context* argument was deprecated. + """ if ary.dtype == object: raise RuntimeError("to_device does not work on object arrays.") @@ -903,7 +989,12 @@ def to_device(queue, ary, allocator=None, async=False): empty = Array def zeros(queue, shape, dtype, order="C", allocator=None): - """Returns an array of the given shape and dtype filled with 0's.""" + """Same as :func:`empty`, but the :class:`Array` is zero-initialized before + being returned. + + .. versionchanged:: 2011.1 + *context* argument was deprecated. + """ result = Array(queue, shape, dtype, order=order, allocator=allocator) @@ -912,9 +1003,17 @@ def zeros(queue, shape, dtype, order="C", allocator=None): return result def empty_like(ary): + """Make a new, uninitialized :class:`Array` having the same properties + as *other_ary*. + """ + return ary._new_with_changes(data=None) def zeros_like(ary): + """Make a new, zero-initialized :class:`Array` having the same properties + as *other_ary*. + """ + result = empty_like(ary) zero = np.zeros((), ary.dtype) result.fill(zero) @@ -927,12 +1026,21 @@ def _arange_knl(result, start, step): result.context, result.dtype) def arange(queue, *args, **kwargs): - """Create an array filled with numbers spaced `step` apart, + """Create a :class:`Array` filled with numbers spaced `step` apart, starting from `start` and ending at `stop`. For floating point arguments, the length of the result is `ceil((stop - start)/step)`. This rule may result in the last - element of the result being greater than stop. + element of the result being greater than `stop`. + + *dtype*, if not specified, is taken as the largest common type + of *start*, *stop* and *step*. + + .. versionchanged:: 2011.1 + *context* argument was deprecated. + + .. versionchanged:: 2011.2 + *allocator* keyword argument was added. """ # argument processing ----------------------------------------------------- @@ -1019,6 +1127,10 @@ def _take(result, ary, indices): def take(a, indices, out=None, queue=None): + """Return the :class:`Array` ``[a[indices[0]], ..., a[indices[n]]]``. + For the moment, *a* must be a type that can be bound to a texture. + """ + queue = queue or a.queue if out is None: out = Array(queue, indices.shape, a.dtype, allocator=a.allocator) @@ -1224,33 +1336,30 @@ def _if_positive(result, criterion, then_, else_): def if_positive(criterion, then_, else_, out=None, queue=None): + """Return an array like *then_*, which, for the element at index *i*, + contains *then_[i]* if *criterion[i]>0*, else *else_[i]*. + """ + if not (criterion.shape == then_.shape == else_.shape): raise ValueError("shapes do not match") if not (then_.dtype == else_.dtype): raise ValueError("dtypes do not match") - knl = elementwise.get_if_positive_kernel( - criterion.context, - criterion.dtype, then_.dtype) - if out is None: out = empty_like(then_) _if_positive(out, criterion, then_, else_) return out - - - def maximum(a, b, out=None, queue=None): + """Return the elementwise maximum of *a* and *b*.""" + # silly, but functional return if_positive(a.mul_add(1, b, -1, queue=queue), a, b, queue=queue, out=out) - - - def minimum(a, b, out=None, queue=None): + """Return the elementwise minimum of *a* and *b*.""" # silly, but functional return if_positive(a.mul_add(1, b, -1, queue=queue), b, a, queue=queue, out=out) @@ -1262,16 +1371,25 @@ _builtin_min = min _builtin_max = max def sum(a, dtype=None, queue=None): + """ + .. versionadded:: 2011.1 + """ from pyopencl.reduction import get_sum_kernel krnl = get_sum_kernel(a.context, dtype, a.dtype) return krnl(a, queue=queue) def dot(a, b, dtype=None, queue=None): + """ + .. versionadded:: 2011.1 + """ from pyopencl.reduction import get_dot_kernel krnl = get_dot_kernel(a.context, dtype, a.dtype, b.dtype) return krnl(a, b, queue=queue) def subset_dot(subset, a, b, dtype=None, queue=None): + """ + .. versionadded:: 2011.1 + """ from pyopencl.reduction import get_subset_dot_kernel krnl = get_subset_dot_kernel(a.context, dtype, subset.dtype, a.dtype, b.dtype) return krnl(subset, a, b, queue=queue) @@ -1285,7 +1403,14 @@ def _make_minmax_kernel(what): return f min = _make_minmax_kernel("min") +min.__doc__ = """ + .. versionadded:: 2011.1 + """ + max = _make_minmax_kernel("max") +max.__doc__ = """ + .. versionadded:: 2011.1 + """ def _make_subset_minmax_kernel(what): def f(subset, a, queue=None): @@ -1296,7 +1421,9 @@ def _make_subset_minmax_kernel(what): return f subset_min = _make_subset_minmax_kernel("min") +subset_min.__doc__ = """.. versionadded:: 2011.1""" subset_max = _make_subset_minmax_kernel("max") +subset_max.__doc__ = """.. versionadded:: 2011.1""" # }}} diff --git a/pyopencl/clrandom.py b/pyopencl/clrandom.py index 5db53e671b4219e5cc7f89cc44e0de2a74a5a8e2..43baca7ae941276fec622d9dac57380a7e1ccce5 100644 --- a/pyopencl/clrandom.py +++ b/pyopencl/clrandom.py @@ -1,3 +1,4 @@ +# encoding: utf8 from __future__ import division __copyright__ = "Copyright (C) 2009 Andreas Kloeckner" @@ -22,6 +23,33 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ +# {{{ documentation + +__doc__ = """ +PyOpenCL now includes and uses the `RANLUXCL random number generator +<https://bitbucket.org/ivarun/ranluxcl/>`_ by Ivar Ursin Nikolaisen. In +addition to being usable through the convenience functions above, it is +available in any piece of code compiled through PyOpenCL by:: + + #include <pyopencl-ranluxcl.cl> + +See the `source <https://github.com/inducer/pyopencl/blob/master/src/cl/pyopencl-ranluxcl.cl>`_ +for some documentation if you're planning on using RANLUXCL directly. + +The RANLUX generator is described in the following two articles. If you use the +generator for scientific purposes, please consider citing them: + +* Martin Lüscher, A portable high-quality random number generator for lattice + field theory simulations, `Computer Physics Communications 79 (1994) 100-110 + <http://dx.doi.org/10.1016/0010-4655(94)90232-1>`_ + +* F. James, RANLUX: A Fortran implementation of the high-quality pseudorandom + number generator of Lüscher, `Computer Physics Communications 79 (1994) 111-114 + <http://dx.doi.org/10.1016/0010-4655(94)90233-X>`_ +""" + +# }}} + import pyopencl as cl import pyopencl.array as cl_array from pyopencl.tools import first_arg_dependent_memoize @@ -32,10 +60,47 @@ import numpy as np + + + class RanluxGenerator(object): + """ + .. versionadded:: 2011.2 + + .. attribute:: state + + A :class:`pyopencl.array.Array` containing the state of the generator. + + .. attribute:: nskip + + nskip is an integer which can (optionally) be defined in the kernel code + as RANLUXCL_NSKIP. If this is done the generator will be faster for luxury setting + 0 and 1, or when the p-value is manually set to a multiple of 24. + """ + def __init__(self, queue, num_work_items=None, luxury=None, seed=None, no_warmup=False, use_legacy_init=False, max_work_items=None): + """ + :param queue: :class:`pyopencl.CommandQueue`, only used for initialization + :param luxury: the "luxury value" of the generator, and should be 0-4, where 0 is fastest + and 4 produces the best numbers. It can also be >=24, in which case it directly + sets the p-value of RANLUXCL. + :param num_work_items: is the number of generators to initialize, usually corresponding + to the number of work-items in the NDRange RANLUXCL will be used with. + May be `None`, in which case a default value is used. + :param max_work_items: should reflect the maximum number of work-items that will be used + on any parallel instance of RANLUXCL. So for instance if we are launching 5120 + work-items on GPU1 and 10240 work-items on GPU2, GPU1's RANLUXCLTab would be + generated by calling ranluxcl_intialization with numWorkitems = 5120 while + GPU2's RANLUXCLTab would use numWorkitems = 10240. However maxWorkitems must + be at least 10240 for both GPU1 and GPU2, and it must be set to the same value + for both. (may be `None`) + + .. versionchanged:: 2013.1 + Added default value for `num_work_items`. + """ + if luxury is None: luxury = 4 @@ -215,6 +280,10 @@ class RanluxGenerator(object): return knl, size_multiplier def fill_uniform(self, ary, a=0, b=1, queue=None): + """Fill *ary* with uniformly distributed random numbers in the interval + *(a, b)*, endpoints excluded. + """ + if queue is None: queue = ary.queue @@ -225,6 +294,8 @@ class RanluxGenerator(object): b-a, a) def uniform(self, *args, **kwargs): + """Make a new empty array, apply :meth:`fill_uniform` to it. + """ a = kwargs.pop("a", 0) b = kwargs.pop("b", 1) @@ -234,6 +305,10 @@ class RanluxGenerator(object): return result def fill_normal(self, ary, mu=0, sigma=1, queue=None): + """Fill *ary* with normally distributed numbers with mean *mu* and + standard deviation *sigma*. + """ + if queue is None: queue = ary.queue @@ -243,6 +318,8 @@ class RanluxGenerator(object): self.state.data, ary.data, ary.size*size_multiplier, sigma, mu) def normal(self, *args, **kwargs): + """Make a new empty array, apply :meth:`fill_normal` to it. + """ mu = kwargs.pop("mu", 0) sigma = kwargs.pop("sigma", 1) @@ -273,6 +350,11 @@ class RanluxGenerator(object): return prg.sync def synchronize(self, queue): + """The generator gets inefficient when different work items invoke the + generator a differing number of times. This function ensures + efficiency. + """ + self.get_sync_kernel()(queue, (self.num_work_items,), self.wg_size, self.state.data) @@ -290,6 +372,8 @@ def _get_generator(queue, luxury=None): def fill_rand(result, queue=None, luxury=4, a=0, b=1): + """Fill *result* with random values of `dtype` in the range [0,1). + """ if queue is None: queue = result.queue gen = _get_generator(queue, luxury=luxury) @@ -299,6 +383,10 @@ def fill_rand(result, queue=None, luxury=4, a=0, b=1): def rand(queue, shape, dtype, luxury=None, a=0, b=1): + """Return an array of `shape` filled with random values of `dtype` + in the range [a,b). + """ + from pyopencl.array import Array gen = _get_generator(queue, luxury) result = Array(queue, shape, dtype) @@ -307,4 +395,4 @@ def rand(queue, shape, dtype, luxury=None, a=0, b=1): -# vim: filetype=pyopencl +# vim: filetype=pyopencl:foldmethod=marker