diff --git a/doc/source/misc.rst b/doc/source/misc.rst
index ba86c530f9bf435cb30664f062c6f40ef5b429c4..e05694ba2a9e32bb0fe7801ca1cf967efaf44813 100644
--- a/doc/source/misc.rst
+++ b/doc/source/misc.rst
@@ -99,9 +99,10 @@ Version 2013.1
   See :attr:`pyopencl.array.Array.base_data` and :attr:`pyopencl.array.Array.offset`.
   Note that not all functions in PyOpenCL support such arrays just yet. These
   will fail with :exc:`pyopencl.array.ArrayHasOffsetError`.
-* Add :meth:`pyopencl.array.Array.__getitem__`, supporting generic slicing.
-  Note that many (most) operations on sliced arrays will fail for now.
-  This will be fixed in a future release.
+* Add :meth:`pyopencl.array.Array.__getitem__` and :meth:`pyopencl.array.Array.__setitem__`,
+  supporting generic slicing.
+  Note that some operations (specifically, reductions and scans) on sliced
+  arrays will fail for now.  This will be fixed in a future release.
 * :class:`pyopencl.CommandQueue` may be used as a context manager (in a ``with`` statement)
 
 Version 2012.1
diff --git a/pyopencl/array.py b/pyopencl/array.py
index c6e364f58519b7ba2cfea5f1aa27df1510119b7f..a48029c36f37207fb4ec27ce9770e26f69b8621b 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -348,6 +348,7 @@ class Array(object):
     .. automethod :: conj
 
     .. automethod :: __getitem__
+    .. automethod :: __setitem__
 
     """
 
@@ -1086,6 +1087,9 @@ class Array(object):
         del self.events[:]
 
     def __getitem__(self, index):
+        """
+        .. versionadded:: 2013.1
+        """
         if not isinstance(index, tuple):
             index = (index,)
 
@@ -1161,6 +1165,50 @@ class Array(object):
                 shape=tuple(new_shape),
                 strides=tuple(new_strides))
 
+    def __setitem__(self, subscript, value):
+        """Set the slice of *self* identified *subscript* to *value*.
+
+        *value* is allowed to be:
+
+        * A :class:`Array` of the same :attr:`shape` and (for now) :attr:`strides`,
+          but with potentially different :attr:`dtype`.
+        * A :class:`numpy.ndarray` of the same :attr:`shape` and (for now)
+          :attr:`strides`, but with potentially different :attr:`dtype`.
+        * A scalar.
+
+        Non-scalar broadcasting is not currently supported.
+
+        .. versionadded:: 2013.1
+        """
+
+        subarray = self[subscript]
+
+        if isinstance(value, np.ndarray):
+            if subarray.shape == value.shape and subarray.strides == value.strides:
+                self.events.append(
+                        cl.enqueue_copy(self.queue, subarray.base_data,
+                            value, device_offset=subarray.offset))
+                return
+            else:
+                value = to_device(self.queue, value, self.allocator)
+
+        if isinstance(value, Array):
+            if len(subarray.shape) != len(value.shape):
+                raise NotImplementedError("broadcasting is not "
+                        "supported in __setitem__")
+            if subarray.shape != value.shape:
+                raise ValueError("cannot assign between arrays of "
+                        "differing shapes")
+            if subarray.strides != value.strides:
+                raise ValueError("cannot assign between arrays of "
+                        "differing strides")
+
+            self._copy(subarray, value)
+
+        else:
+            # Let's assume it's a scalar
+            subarray.fill(value)
+
 # }}}
 
 
diff --git a/test/test_array.py b/test/test_array.py
index 05b6d8c8af13728ede62c0a308df7ae8ebf27142..eb72bf4c901b95706ac106d2675eb960a6b86d9c 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -549,7 +549,9 @@ def test_slice(ctx_factory):
 
     l = 20000
     a_gpu = clrand(queue, (l,), dtype=np.float32)
+    b_gpu = clrand(queue, (l,), dtype=np.float32)
     a = a_gpu.get()
+    b = b_gpu.get()
 
     from random import randrange
     for i in range(20):
@@ -561,6 +563,24 @@ def test_slice(ctx_factory):
 
         assert la.norm(a_gpu_slice.get() - a_slice) == 0
 
+    for i in range(20):
+        start = randrange(l)
+        end = randrange(start, l)
+
+        a_gpu[start:end] = 2*b[start:end]
+        a[start:end] = 2*b[start:end]
+
+        assert la.norm(a_gpu.get() - a) == 0
+
+    for i in range(20):
+        start = randrange(l)
+        end = randrange(start, l)
+
+        a_gpu[start:end] = 2*b_gpu[start:end]
+        a[start:end] = 2*b[start:end]
+
+        assert la.norm(a_gpu.get() - a) == 0
+
 
 if __name__ == "__main__":
     # make sure that import failures get reported, instead of skipping the