diff --git a/doc/source/array.rst b/doc/source/array.rst index d9d7ccf75eac37a7904dfa8ce982320fc565c424..e3802292ce38630db05e606c3d0df02f01379fdb 100644 --- a/doc/source/array.rst +++ b/doc/source/array.rst @@ -125,6 +125,7 @@ Constructing :class:`Array` Instances .. autofunction:: zeros_like .. autofunction:: arange .. autofunction:: take +.. autofunction:: concatenate Conditionals ^^^^^^^^^^^^ diff --git a/doc/source/conf.py b/doc/source/conf.py index 4f46d9a166ce69086f4cf199e1eeb6117f6432cf..8b10c70cbb70453e61b8a1fb6c8a2c7df948f82a 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -167,8 +167,9 @@ htmlhelp_basename = 'PyCudadoc' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, document class [howto/manual]). latex_documents = [ - ('index', 'pyopencl.tex', 'PyOpenCL Documentation', 'Andreas Kloeckner', 'manual'), -] + ('index', 'pyopencl.tex', 'PyOpenCL Documentation', + 'Andreas Kloeckner', 'manual'), + ] # The name of an image file (relative to this directory) to place at the top of # the title page. diff --git a/doc/source/misc.rst b/doc/source/misc.rst index 78a02f05da0db9ec5281cde04e25bb5c297bec51..6d9c7f198107acc63dcd55b856f0d44ea2215bcf 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -105,6 +105,7 @@ Version 2013.1 arrays will fail for now. This will be fixed in a future release. * :class:`pyopencl.CommandQueue` may be used as a context manager (in a ``with`` statement) * Add :func:`pyopencl.clmath.atan2`, :func:`pyopencl.clmath.atan2pi`. +* Add :func:`pyopencl.array.concatenate`. Version 2012.1 -------------- diff --git a/pyopencl/array.py b/pyopencl/array.py index a48029c36f37207fb4ec27ce9770e26f69b8621b..9833559de02cf3e3db37d903ffb484d520e7e8e1 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -519,9 +519,15 @@ class Array(object): events=self.events) def with_queue(self, queue): - """Return a copy of *self* with the default queue set to *queue*.""" + """Return a copy of *self* with the default queue set to *queue*. - assert queue.context == self.context + *None* is allowed as a value for *queue*. + + .. versionadded:: 2013.1 + """ + + if queue is not None: + assert queue.context == self.context return self._new_with_changes(self.base_data, self.offset, queue=queue) @@ -1165,32 +1171,19 @@ class Array(object): shape=tuple(new_shape), strides=tuple(new_strides)) - def __setitem__(self, subscript, value): - """Set the slice of *self* identified *subscript* to *value*. - - *value* is allowed to be: - - * A :class:`Array` of the same :attr:`shape` and (for now) :attr:`strides`, - but with potentially different :attr:`dtype`. - * A :class:`numpy.ndarray` of the same :attr:`shape` and (for now) - :attr:`strides`, but with potentially different :attr:`dtype`. - * A scalar. - - Non-scalar broadcasting is not currently supported. - - .. versionadded:: 2013.1 - """ + def _setitem(self, subscript, value, queue=None): + queue = queue or self.queue or value.queue subarray = self[subscript] if isinstance(value, np.ndarray): if subarray.shape == value.shape and subarray.strides == value.strides: self.events.append( - cl.enqueue_copy(self.queue, subarray.base_data, + cl.enqueue_copy(queue, subarray.base_data, value, device_offset=subarray.offset)) return else: - value = to_device(self.queue, value, self.allocator) + value = to_device(queue, value, self.allocator) if isinstance(value, Array): if len(subarray.shape) != len(value.shape): @@ -1203,11 +1196,28 @@ class Array(object): raise ValueError("cannot assign between arrays of " "differing strides") - self._copy(subarray, value) + self._copy(subarray, value, queue=queue) else: # Let's assume it's a scalar - subarray.fill(value) + subarray.fill(value, queue=queue) + + def __setitem__(self, subscript, value): + """Set the slice of *self* identified *subscript* to *value*. + + *value* is allowed to be: + + * A :class:`Array` of the same :attr:`shape` and (for now) :attr:`strides`, + but with potentially different :attr:`dtype`. + * A :class:`numpy.ndarray` of the same :attr:`shape` and (for now) + :attr:`strides`, but with potentially different :attr:`dtype`. + * A scalar. + + Non-scalar broadcasting is not currently supported. + + .. versionadded:: 2013.1 + """ + self._setitem(subscript, value) # }}} @@ -1605,6 +1615,54 @@ def multi_put(arrays, dest_indices, dest_shape=None, out=None, queue=None): return out + +def concatenate(arrays, axis=0, queue=None, allocator=None): + # {{{ find properties of result array + + shape = None + + for i_ary, ary in enumerate(arrays): + queue = queue or ary.queue + allocator = allocator or ary.allocator + + if shape is None: + # first array + shape = list(ary.shape) + else: + if len(ary.shape) != len(shape): + raise ValueError("%d'th array has different number of axes " + "(shold have %d, has %d)" + % (i_ary, len(ary.shape), len(shape))) + + ary_shape_list = list(ary.shape) + if (ary_shape_list[:axis] != shape[:axis] + or ary_shape_list[axis+1:] != shape[axis+1:]): + raise ValueError("%d'th array has residual not matching " + "other arrays" % i_ary) + + shape[axis] += ary.shape[axis] + + # }}} + + shape = tuple(shape) + dtype = np.find_common_type([ary.dtype for ary in arrays], []) + result = empty(queue, shape, dtype, allocator=allocator) + + full_slice = (slice(None),) * len(shape) + + base_idx = 0 + for ary in arrays: + my_len = ary.shape[axis] + result._setitem( + full_slice[:axis] + + (slice(base_idx, base_idx+my_len),) + + full_slice[axis+1:], + ary) + + base_idx += my_len + + return result + # }}} diff --git a/test/test_array.py b/test/test_array.py index eb72bf4c901b95706ac106d2675eb960a6b86d9c..a2f4739175d80a49fb34d17db2cd03ee28f114b7 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -582,6 +582,26 @@ def test_slice(ctx_factory): assert la.norm(a_gpu.get() - a) == 0 +@pytools.test.mark_test.opencl +def test_concatenate(ctx_factory): + context = ctx_factory() + queue = cl.CommandQueue(context) + + from pyopencl.clrandom import rand as clrand + + a_dev = clrand(queue, (5, 15, 20), dtype=np.float32) + b_dev = clrand(queue, (4, 15, 20), dtype=np.float32) + c_dev = clrand(queue, (3, 15, 20), dtype=np.float32) + a = a_dev.get() + b = b_dev.get() + c = c_dev.get() + + cat_dev = cl.array.concatenate((a_dev, b_dev, c_dev)) + cat = np.concatenate((a, b, c)) + + assert la.norm(cat - cat_dev.get()) == 0 + + if __name__ == "__main__": # make sure that import failures get reported, instead of skipping the # tests.