diff --git a/doc/source/array.rst b/doc/source/array.rst
index d9d7ccf75eac37a7904dfa8ce982320fc565c424..e3802292ce38630db05e606c3d0df02f01379fdb 100644
--- a/doc/source/array.rst
+++ b/doc/source/array.rst
@@ -125,6 +125,7 @@ Constructing :class:`Array` Instances
 .. autofunction:: zeros_like
 .. autofunction:: arange
 .. autofunction:: take
+.. autofunction:: concatenate
 
 Conditionals
 ^^^^^^^^^^^^
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 4f46d9a166ce69086f4cf199e1eeb6117f6432cf..8b10c70cbb70453e61b8a1fb6c8a2c7df948f82a 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -167,8 +167,9 @@ htmlhelp_basename = 'PyCudadoc'
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, document class [howto/manual]).
 latex_documents = [
-  ('index', 'pyopencl.tex', 'PyOpenCL Documentation', 'Andreas Kloeckner', 'manual'),
-]
+        ('index', 'pyopencl.tex', 'PyOpenCL Documentation',
+            'Andreas Kloeckner', 'manual'),
+        ]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
diff --git a/doc/source/misc.rst b/doc/source/misc.rst
index 78a02f05da0db9ec5281cde04e25bb5c297bec51..6d9c7f198107acc63dcd55b856f0d44ea2215bcf 100644
--- a/doc/source/misc.rst
+++ b/doc/source/misc.rst
@@ -105,6 +105,7 @@ Version 2013.1
   arrays will fail for now.  This will be fixed in a future release.
 * :class:`pyopencl.CommandQueue` may be used as a context manager (in a ``with`` statement)
 * Add :func:`pyopencl.clmath.atan2`, :func:`pyopencl.clmath.atan2pi`.
+* Add :func:`pyopencl.array.concatenate`.
 
 Version 2012.1
 --------------
diff --git a/pyopencl/array.py b/pyopencl/array.py
index a48029c36f37207fb4ec27ce9770e26f69b8621b..9833559de02cf3e3db37d903ffb484d520e7e8e1 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -519,9 +519,15 @@ class Array(object):
                     events=self.events)
 
     def with_queue(self, queue):
-        """Return a copy of *self* with the default queue set to *queue*."""
+        """Return a copy of *self* with the default queue set to *queue*.
 
-        assert queue.context == self.context
+        *None* is allowed as a value for *queue*.
+
+        .. versionadded:: 2013.1
+        """
+
+        if queue is not None:
+            assert queue.context == self.context
         return self._new_with_changes(self.base_data, self.offset,
                 queue=queue)
 
@@ -1165,32 +1171,19 @@ class Array(object):
                 shape=tuple(new_shape),
                 strides=tuple(new_strides))
 
-    def __setitem__(self, subscript, value):
-        """Set the slice of *self* identified *subscript* to *value*.
-
-        *value* is allowed to be:
-
-        * A :class:`Array` of the same :attr:`shape` and (for now) :attr:`strides`,
-          but with potentially different :attr:`dtype`.
-        * A :class:`numpy.ndarray` of the same :attr:`shape` and (for now)
-          :attr:`strides`, but with potentially different :attr:`dtype`.
-        * A scalar.
-
-        Non-scalar broadcasting is not currently supported.
-
-        .. versionadded:: 2013.1
-        """
+    def _setitem(self, subscript, value, queue=None):
+        queue = queue or self.queue or value.queue
 
         subarray = self[subscript]
 
         if isinstance(value, np.ndarray):
             if subarray.shape == value.shape and subarray.strides == value.strides:
                 self.events.append(
-                        cl.enqueue_copy(self.queue, subarray.base_data,
+                        cl.enqueue_copy(queue, subarray.base_data,
                             value, device_offset=subarray.offset))
                 return
             else:
-                value = to_device(self.queue, value, self.allocator)
+                value = to_device(queue, value, self.allocator)
 
         if isinstance(value, Array):
             if len(subarray.shape) != len(value.shape):
@@ -1203,11 +1196,28 @@ class Array(object):
                 raise ValueError("cannot assign between arrays of "
                         "differing strides")
 
-            self._copy(subarray, value)
+            self._copy(subarray, value, queue=queue)
 
         else:
             # Let's assume it's a scalar
-            subarray.fill(value)
+            subarray.fill(value, queue=queue)
+
+    def __setitem__(self, subscript, value):
+        """Set the slice of *self* identified *subscript* to *value*.
+
+        *value* is allowed to be:
+
+        * A :class:`Array` of the same :attr:`shape` and (for now) :attr:`strides`,
+          but with potentially different :attr:`dtype`.
+        * A :class:`numpy.ndarray` of the same :attr:`shape` and (for now)
+          :attr:`strides`, but with potentially different :attr:`dtype`.
+        * A scalar.
+
+        Non-scalar broadcasting is not currently supported.
+
+        .. versionadded:: 2013.1
+        """
+        self._setitem(subscript, value)
 
 # }}}
 
@@ -1605,6 +1615,54 @@ def multi_put(arrays, dest_indices, dest_shape=None, out=None, queue=None):
 
     return out
 
+
+def concatenate(arrays, axis=0, queue=None, allocator=None):
+    # {{{ find properties of result array
+
+    shape = None
+
+    for i_ary, ary in enumerate(arrays):
+        queue = queue or ary.queue
+        allocator = allocator or ary.allocator
+
+        if shape is None:
+            # first array
+            shape = list(ary.shape)
+        else:
+            if len(ary.shape) != len(shape):
+                raise ValueError("%d'th array has different number of axes "
+                        "(shold have %d, has %d)"
+                        % (i_ary, len(ary.shape), len(shape)))
+
+            ary_shape_list = list(ary.shape)
+            if (ary_shape_list[:axis] != shape[:axis]
+                    or ary_shape_list[axis+1:] != shape[axis+1:]):
+                raise ValueError("%d'th array has residual not matching "
+                        "other arrays" % i_ary)
+
+            shape[axis] += ary.shape[axis]
+
+    # }}}
+
+    shape = tuple(shape)
+    dtype = np.find_common_type([ary.dtype for ary in arrays], [])
+    result = empty(queue, shape, dtype, allocator=allocator)
+
+    full_slice = (slice(None),) * len(shape)
+
+    base_idx = 0
+    for ary in arrays:
+        my_len = ary.shape[axis]
+        result._setitem(
+                full_slice[:axis]
+                + (slice(base_idx, base_idx+my_len),)
+                + full_slice[axis+1:],
+                ary)
+
+        base_idx += my_len
+
+    return result
+
 # }}}
 
 
diff --git a/test/test_array.py b/test/test_array.py
index eb72bf4c901b95706ac106d2675eb960a6b86d9c..a2f4739175d80a49fb34d17db2cd03ee28f114b7 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -582,6 +582,26 @@ def test_slice(ctx_factory):
         assert la.norm(a_gpu.get() - a) == 0
 
 
+@pytools.test.mark_test.opencl
+def test_concatenate(ctx_factory):
+    context = ctx_factory()
+    queue = cl.CommandQueue(context)
+
+    from pyopencl.clrandom import rand as clrand
+
+    a_dev = clrand(queue, (5, 15, 20), dtype=np.float32)
+    b_dev = clrand(queue, (4, 15, 20), dtype=np.float32)
+    c_dev = clrand(queue, (3, 15, 20), dtype=np.float32)
+    a = a_dev.get()
+    b = b_dev.get()
+    c = c_dev.get()
+
+    cat_dev = cl.array.concatenate((a_dev, b_dev, c_dev))
+    cat = np.concatenate((a, b, c))
+
+    assert la.norm(cat - cat_dev.get()) == 0
+
+
 if __name__ == "__main__":
     # make sure that import failures get reported, instead of skipping the
     # tests.