diff --git a/doc/source/index.rst b/doc/source/index.rst
index 9ec0a735d3e8834e291054525db33fbb9be064a3..387bd7cd0be922e4b07a8548a56cce1abe133db1 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -56,7 +56,7 @@ Here's an example, to give you an impression::
     prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf)
 
     a_plus_b = numpy.empty_like(a)
-    cl.enqueue_read_buffer(queue, dest_buf, a_plus_b).wait()
+    cl.enqueue_copy(queue, a_plus_b, dest_buf)
 
     print la.norm(a_plus_b - (a+b))
 
diff --git a/doc/source/misc.rst b/doc/source/misc.rst
index 1a2b031307de2a0ce2cb6dedb277f7f951ffa4c7..4fd88e65b1e4b52752dade85c09bdec37eb1b4c6 100644
--- a/doc/source/misc.rst
+++ b/doc/source/misc.rst
@@ -97,6 +97,7 @@ Version 2011.1
 * Add vector types, see :class:`pyopencl.array.vec`.
 * Add :attr:`pyopencl.array.Array.strides`, :attr:`pyopencl.array.Array.flags`.
   Allow the creation of arrys in C and Fortran order.
+* Add :func:`pyopencl.enqueue_copy`. Deprecate all other transfer functions.
 
 Version 0.92
 ------------
diff --git a/doc/source/runtime.rst b/doc/source/runtime.rst
index aab26ef2108c2cb6ce2060143a67a4a91acae1de..189323fbb0856fc415aaa7f77631a670e4ed789e 100644
--- a/doc/source/runtime.rst
+++ b/doc/source/runtime.rst
@@ -64,6 +64,9 @@ Platforms, Devices and Contexts
     whose completion this command waits before starting exeuction.
 .. |std-enqueue-blurb| replace:: Returns a new :class:`Event`. |explain-waitfor|
 
+.. |copy-depr| replace:: **Note:** This function is deprecated as of PyOpenCL 2011.1.
+        Use :func:`enqueue_copy` instead.
+
 .. function:: get_platforms()
 
     Return a list of :class:`Platform` instances.
@@ -300,6 +303,8 @@ Buffers
 
     *hostbuf* |buf-iface|
 
+    |copy-depr|
+
     .. versionchanged:: 2011.1
         *is_blocking* now defaults to True.
 
@@ -309,6 +314,8 @@ Buffers
 
     *hostbuf* |buf-iface|
 
+    |copy-depr|
+
     .. versionchanged:: 2011.1
         *is_blocking* now defaults to True.
 
@@ -319,6 +326,8 @@ Buffers
 
     |std-enqueue-blurb|
 
+    |copy-depr|
+
     .. versionadded:: 0.91.5
 
 .. function:: enqueue_read_buffer_rect(queue, mem, hostbuf, buffer_origin, host_origin, region, buffer_pitches=None, host_pitches=None, wait_for=None, is_blocking=True)
@@ -331,6 +340,8 @@ Buffers
 
     *hostbuf* |buf-iface|
 
+    |copy-depr|
+
     Only available in OpenCL 1.1 and newer.
 
     .. versionadded:: 0.92
@@ -348,6 +359,8 @@ Buffers
 
     *hostbuf* |buf-iface|
 
+    |copy-depr|
+
     Only available in OpenCL 1.1 and newer.
 
     .. versionadded:: 0.92
@@ -363,6 +376,8 @@ Buffers
 
     |std-enqueue-blurb|
 
+    |copy-depr|
+
     Only available in OpenCL 1.1 and newer.
 
     .. versionadded:: 0.92
@@ -458,6 +473,8 @@ Images
 
     |std-enqueue-blurb|
 
+    |copy-depr|
+
     .. versionchanged:: 0.91
         *pitch* arguments defaults to zero, moved.
 
@@ -478,16 +495,90 @@ Images
 
 .. function:: enqueue_copy_image(queue, src, dest, src_origin, dest_origin, region, wait_for=None)
 
+    |copy-depr|
+
     |std-enqueue-blurb|
 
 .. function:: enqueue_copy_image_to_buffer(queue, src, dest, origin, region, offset, wait_for=None)
 
+    |copy-depr|
+
     |std-enqueue-blurb|
 
 .. function:: enqueue_copy_buffer_to_image(queue, src, dest, offset, origin, region, wait_for=None)
 
+    |copy-depr|
+
     |std-enqueue-blurb|
 
+Transfers
+^^^^^^^^^
+
+.. function:: enqueue_copy(queue, dest, src, **kwargs)
+
+    Copy from :class:`Image`, :class:`Buffer` or the host to 
+    :class:`Image`, :class:`Buffer` or the host. (Note: host-to-host
+    copies are unsupported.)
+
+    The following keyword arguments are available:
+
+    :arg wait_for: (optional, default empty)
+    :arg is_blocking: Wait for completion. Defaults to *True*. 
+      (Available on any copy involving host memory)
+
+    :class:`Buffer` ↔ host transfers:
+
+    :arg device_offset: offset in bytes (optional)
+
+    :class:`Buffer` ↔ :class:`Buffer` transfers:
+
+    :arg byte_count: (optional)
+    :arg src_offset: (optional)
+    :arg dest_offset: (optional)
+
+    Rectangular :class:`Buffer` ↔  host transfers (CL 1.1 and newer):
+
+    :arg buffer_origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg host_origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg region: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg buffer_pitches: :class:`tuple` of :class:`int` of length
+        two or shorter. (optional, "tightly-packed" if unspecified)
+    :arg host_pitches: :class:`tuple` of :class:`int` of length
+        two or shorter. (optional, "tightly-packed" if unspecified)
+
+    :class:`Image` ↔ host transfers:
+
+    :arg origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg region: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg pitches: :class:`tuple` of :class:`int` of length
+        two or shorter. (optional)
+
+    :class:`Buffer` ↔ :class:`Image` transfers:
+
+    :arg offset: offset in buffer (mandatory)
+    :arg origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg region: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+
+    :class:`Image` ↔ :class:`Image` transfers:
+
+    :arg src_origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg dest_origin: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+    :arg region: :class:`tuple` of :class:`int` of length
+        three or shorter. (mandatory)
+
+    |std-enqueue-blurb|
+
+    .. versionadded:: 2011.1
+
 Mapping Memory into Host Address Space
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/examples/demo.py b/examples/demo.py
index 98fb46f8b49e6ebca02c9481e186127045e25b9a..ba948d6716b84c338f3a28b64d0b3e6c9425a1bc 100644
--- a/examples/demo.py
+++ b/examples/demo.py
@@ -25,6 +25,6 @@ prg = cl.Program(ctx, """
 prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf)
 
 a_plus_b = numpy.empty_like(a)
-cl.enqueue_read_buffer(queue, dest_buf, a_plus_b).wait()
+cl.enqueue_copy(queue, a_plus_b, dest_buf)
 
 print(la.norm(a_plus_b - (a+b)), la.norm(a_plus_b))
diff --git a/examples/demo_elementwise_complex.py b/examples/demo_elementwise_complex.py
index c845c35badf7f7d2a682b355cdce7f2cfd63c736..7845530b13c152db9e35479ba7ea3bcbcd56064d 100644
--- a/examples/demo_elementwise_complex.py
+++ b/examples/demo_elementwise_complex.py
@@ -7,10 +7,10 @@ ctx = cl.create_some_context()
 queue = cl.CommandQueue(ctx)
 
 n = 10
-a_gpu = cl_array.to_device(ctx, queue,
+a_gpu = cl_array.to_device(queue,
         ( numpy.random.randn(n) + 1j*numpy.random.randn(n)
             ).astype(numpy.complex64))
-b_gpu = cl_array.to_device(ctx, queue,
+b_gpu = cl_array.to_device(queue,
         ( numpy.random.randn(n) + 1j*numpy.random.randn(n)
             ).astype(numpy.complex64))
 
@@ -46,7 +46,7 @@ real_part = ElementwiseKernel(ctx,
 c_gpu = cl_array.empty_like(a_gpu)
 complex_prod(5, a_gpu, b_gpu, c_gpu)
 
-c_gpu_real = cl_array.empty(ctx, len(a_gpu), dtype=numpy.float32, queue=queue)
+c_gpu_real = cl_array.empty(queue, len(a_gpu), dtype=numpy.float32)
 real_part(c_gpu, c_gpu_real)
 print c_gpu.get().real - c_gpu_real.get()
 
diff --git a/examples/matrix-multiply.py b/examples/matrix-multiply.py
index 91560ffbf6fb9f39f01e83d32df587a60aafbf45..9de9cf1ea7b419700911584bab9c64961231f338 100644
--- a/examples/matrix-multiply.py
+++ b/examples/matrix-multiply.py
@@ -207,7 +207,7 @@ gpu_time = (time()-t1)/count
 
 # transfer device -> host -----------------------------------------------------
 t1 = time()
-cl.enqueue_read_buffer(queue, d_c_buf, h_c).wait()
+cl.enqueue_copy(queue, h_c, d_c_buf)
 pull_time = time()-t1
 
 # timing output ---------------------------------------------------------------
diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index feab73477196958a5e7a9f63d3c19590a693f4a0..8154caf5b901b72b27066b443a1656ae54808831 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -7,7 +7,7 @@ except ImportError:
     from os.path import dirname, join, realpath
     if realpath(join(os.getcwd(), "pyopencl")) == realpath(dirname(__file__)):
         from warnings import warn
-        warn ("It looks like you are importing PyOpenCL from "
+        warn("It looks like you are importing PyOpenCL from "
                 "its source directory. This likely won't work.")
     raise
 
@@ -16,6 +16,7 @@ except ImportError:
 import numpy as np
 from pyopencl._cl import *
 import inspect as _inspect
+from decorator import decorator as _decorator
 
 CONSTANT_CLASSES = [
         getattr(_cl, name) for name in dir(_cl)
@@ -377,6 +378,89 @@ def create_some_context(interactive=True):
 
     return Context(devices)
 
+
+
+
+def _mark_copy_deprecated(func):
+    def new_func(*args, **kwargs):
+        from warnings import warn
+        warn("'%s' has been deprecated in version 2011.1. Please use "
+                "enqueue_copy() instead." % func.__name__[1:], DeprecationWarning,
+                stacklevel=2)
+        return func(*args, **kwargs)
+
+    try:
+        from functools import update_wrapper
+    except ImportError:
+        pass
+    else:
+        update_wrapper(new_func, func)
+
+    return new_func
+
+
+enqueue_read_image = _mark_copy_deprecated(_cl._enqueue_read_image)
+enqueue_write_image = _mark_copy_deprecated(_cl._enqueue_write_image)
+enqueue_copy_image = _mark_copy_deprecated(_cl._enqueue_copy_image)
+enqueue_copy_image_to_buffer = _mark_copy_deprecated(_cl._enqueue_copy_image_to_buffer)
+enqueue_copy_buffer_to_image = _mark_copy_deprecated(_cl._enqueue_copy_buffer_to_image)
+enqueue_read_buffer = _mark_copy_deprecated(_cl._enqueue_read_buffer)
+enqueue_write_buffer = _mark_copy_deprecated(_cl._enqueue_write_buffer)
+enqueue_copy_buffer = _mark_copy_deprecated(_cl._enqueue_copy_buffer)
+
+if _cl.get_cl_header_version() >= (1,1):
+    enqueue_read_buffer_rect = _mark_copy_deprecated(_cl._enqueue_read_buffer_rect)
+    enqueue_write_buffer_rect = _mark_copy_deprecated(_cl._enqueue_write_buffer_rect)
+    enqueue_copy_buffer_rect = _mark_copy_deprecated(_cl._enqueue_copy_buffer_rect)
+
+def enqueue_copy(queue, dest, src, **kwargs):
+    if isinstance(dest, Buffer):
+        if isinstance(src, Buffer):
+            if "src_origin" in kwargs:
+                return _cl._enqueue_copy_buffer_rect(queue, src, dest, **kwargs)
+            else:
+                kwargs["dst_offset"] = kwargs.pop("dest_offset")
+                return _cl._enqueue_copy_buffer(queue, src, dest, **kwargs)
+        elif isinstance(src, Image):
+            return _cl._enqueue_copy_image_to_buffer(queue, src, dest, **kwargs)
+        else:
+            # assume from-host
+            if "buffer_origin" in kwargs:
+                return _cl._enqueue_write_buffer_rect(queue, dest, src, **kwargs)
+            else:
+                return _cl._enqueue_write_buffer(queue, dest, src, **kwargs)
+
+    elif isinstance(dest, Image):
+        if isinstance(src, Buffer):
+            return _cl._enqueue_copy_buffer_to_image(queue, src, dest, **kwargs)
+        elif isinstance(src, Image):
+            return _cl._enqueue_copy_image(queue, src, dest, **kwargs)
+        else:
+            # assume from-host
+            origin = kwargs.pop("origin")
+            region = kwargs.pop("region")
+            return _cl._enqueue_write_image(queue, dest, origin, region, src, **kwargs)
+
+    else:
+        # assume to-host
+
+        if isinstance(src, Buffer):
+            if "buffer_origin" in kwargs:
+                return _cl._enqueue_read_buffer_rect(queue, src, dest, **kwargs)
+            else:
+                return _cl._enqueue_read_buffer(queue, src, dest, **kwargs)
+        elif isinstance(src, Image):
+            pitches = kwargs.pop("pitches", (0,0))
+            if len(pitches) == 1:
+                kwargs["row_pitch"], = pitches
+            else:
+                kwargs["row_pitch"], kwargs["slice_pitch"] = pitches
+
+            return _cl._enqueue_read_image(queue, src, dest, **kwargs)
+        else:
+            # assume from-host
+            raise TypeError("enqueue_copy cannot perform host-to-host transfers")
+
 # }}}
 
 
diff --git a/pyopencl/array.py b/pyopencl/array.py
index f78cab4bb8db7a993311cf79c48d438e72fc383d..38eabdb31d129f6f681cbdd3b4c9477d85f8ac25 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -349,7 +349,7 @@ class Array(object):
             ary = ary.copy()
 
         if self.size:
-            cl.enqueue_write_buffer(queue or self.queue, self.data, ary, 
+            cl.enqueue_copy(queue or self.queue, self.data, ary,
                     is_blocking=not async)
 
     def get(self, queue=None, ary=None, async=False):
@@ -367,7 +367,7 @@ class Array(object):
         assert self.flags.forc, "Array in get() must be contiguous"
 
         if self.size:
-            cl.enqueue_read_buffer(queue or self.queue, self.data, ary,
+            cl.enqueue_copy(queue or self.queue, ary, self.data,
                     is_blocking=not async)
 
         return ary
diff --git a/src/wrapper/wrap_cl_part_1.cpp b/src/wrapper/wrap_cl_part_1.cpp
index 58fda5d8cf0decdbd965322097d275837cd6675f..f7a7dddbf9a4d0f0191bca9f3f6dc9d6d7011d1c 100644
--- a/src/wrapper/wrap_cl_part_1.cpp
+++ b/src/wrapper/wrap_cl_part_1.cpp
@@ -164,7 +164,7 @@ void pyopencl_expose_part_1()
   // {{{ transfers
 
   // {{{ byte-for-byte
-  py::def("enqueue_read_buffer", enqueue_read_buffer,
+  py::def("_enqueue_read_buffer", enqueue_read_buffer,
       (py::args("queue", "mem", "hostbuf"),
        py::arg("device_offset")=0,
        py::arg("wait_for")=py::object(),
@@ -172,7 +172,7 @@ void pyopencl_expose_part_1()
        py::arg("host_buffer")=py::object()
        ),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_write_buffer", enqueue_write_buffer,
+  py::def("_enqueue_write_buffer", enqueue_write_buffer,
       (py::args("queue", "mem", "hostbuf"),
        py::arg("device_offset")=0,
        py::arg("wait_for")=py::object(),
@@ -180,7 +180,7 @@ void pyopencl_expose_part_1()
        py::arg("host_buffer")=py::object()
        ),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_copy_buffer", enqueue_copy_buffer,
+  py::def("_enqueue_copy_buffer", enqueue_copy_buffer,
       (py::args("queue", "src", "dst"),
        py::arg("byte_count")=0,
        py::arg("src_offset")=0,
@@ -194,7 +194,7 @@ void pyopencl_expose_part_1()
   // {{{ rectangular
 
 #ifdef CL_VERSION_1_1
-  py::def("enqueue_read_buffer_rect", enqueue_read_buffer_rect,
+  py::def("_enqueue_read_buffer_rect", enqueue_read_buffer_rect,
       (py::args("queue", "mem", "hostbuf",
                 "buffer_origin", "host_origin", "region"),
        py::arg("buffer_pitches")=py::object(),
@@ -203,7 +203,7 @@ void pyopencl_expose_part_1()
        py::arg("is_blocking")=true
        ),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_write_buffer_rect", enqueue_write_buffer_rect,
+  py::def("_enqueue_write_buffer_rect", enqueue_write_buffer_rect,
       (py::args("queue", "mem", "hostbuf",
                 "buffer_origin", "host_origin", "region"),
        py::arg("buffer_pitches")=py::object(),
@@ -212,7 +212,7 @@ void pyopencl_expose_part_1()
        py::arg("is_blocking")=true
        ),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_copy_buffer_rect", enqueue_copy_buffer_rect,
+  py::def("_enqueue_copy_buffer_rect", enqueue_copy_buffer_rect,
       (py::args("queue", "src", "dst",
                 "src_origin", "dst_origin", "region"),
        py::arg("src_pitches")=py::object(),
diff --git a/src/wrapper/wrap_cl_part_2.cpp b/src/wrapper/wrap_cl_part_2.cpp
index 899db5b893af39e7f7ea3779e4934dd366b48dca..6e8807fd9adde53ceea30b9a1f97be6971925520 100644
--- a/src/wrapper/wrap_cl_part_2.cpp
+++ b/src/wrapper/wrap_cl_part_2.cpp
@@ -41,7 +41,7 @@ void pyopencl_expose_part_2()
 
   DEF_SIMPLE_FUNCTION(get_supported_image_formats);
 
-  py::def("enqueue_read_image", enqueue_read_image,
+  py::def("_enqueue_read_image", enqueue_read_image,
       (py::args("queue", "mem", "origin", "region", "hostbuf"),
        py::arg("row_pitch")=0,
        py::arg("slice_pitch")=0,
@@ -50,7 +50,7 @@ void pyopencl_expose_part_2()
        py::arg("host_buffer")=py::object()
        ),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_write_image", enqueue_write_image,
+  py::def("_enqueue_write_image", enqueue_write_image,
       (py::args("queue", "mem", "origin", "region", "hostbuf"),
        py::arg("row_pitch")=0,
        py::arg("slice_pitch")=0,
@@ -60,15 +60,15 @@ void pyopencl_expose_part_2()
        ),
       py::return_value_policy<py::manage_new_object>());
 
-  py::def("enqueue_copy_image", enqueue_copy_image,
+  py::def("_enqueue_copy_image", enqueue_copy_image,
       (py::args("queue", "src", "dest", "src_origin", "dest_origin", "region"),
        py::arg("wait_for")=py::object()),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_copy_image_to_buffer", enqueue_copy_image_to_buffer,
+  py::def("_enqueue_copy_image_to_buffer", enqueue_copy_image_to_buffer,
       (py::args("queue", "src", "dest", "origin", "region", "offset"),
        py::arg("wait_for")=py::object()),
       py::return_value_policy<py::manage_new_object>());
-  py::def("enqueue_copy_buffer_to_image", enqueue_copy_image_to_buffer,
+  py::def("_enqueue_copy_buffer_to_image", enqueue_copy_image_to_buffer,
       (py::args("queue", "src", "dest", "offset", "origin", "region"),
        py::arg("wait_for")=py::object()),
       py::return_value_policy<py::manage_new_object>());