diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index ceab9b2b803f15d91777a8f138a5101a85e7e3a0..601ec6f969c7fff0f6f5c2c68952f48c7563ed90 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -862,8 +862,8 @@ def _mark_copy_deprecated(func):
 # enqueue_copy_buffer_to_image = _mark_copy_deprecated(
 #         _cl._enqueue_copy_buffer_to_image)
 enqueue_read_buffer = _mark_copy_deprecated(_cl._enqueue_read_buffer)
-# enqueue_write_buffer = _mark_copy_deprecated(_cl._enqueue_write_buffer)
-# enqueue_copy_buffer = _mark_copy_deprecated(_cl._enqueue_copy_buffer)
+enqueue_write_buffer = _mark_copy_deprecated(_cl._enqueue_write_buffer)
+enqueue_copy_buffer = _mark_copy_deprecated(_cl._enqueue_copy_buffer)
 
 
 # if _cl.get_cl_header_version() >= (1, 1):
diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py
index 42a478e2a0135ea2fa68bff3043fb34e5db38068..f7ac7dccdade7dc84e3cc65bdc561274fc1e6738 100644
--- a/pyopencl/cffi_cl.py
+++ b/pyopencl/cffi_cl.py
@@ -282,7 +282,7 @@ class Buffer(MemoryObjectHolder):
     @classmethod
     def _c_buffer_from_obj(cls, obj):
         # assume numpy array for now
-        return _ffi.cast('void *', obj.__array_interface__['data'][0])
+        return _ffi.cast('void *', obj.__array_interface__['data'][0]), obj.nbytes
         
     def __init__(self, context, flags, size=0, hostbuf=None):
         if hostbuf is not None and not (flags & (mem_flags.USE_HOST_PTR | mem_flags.COPY_HOST_PTR)):
@@ -290,8 +290,7 @@ class Buffer(MemoryObjectHolder):
         c_hostbuf = _ffi.NULL
         if hostbuf is not None:
             # todo: buffer protocol; for now hostbuf is assumed to be a numpy array
-            c_hostbuf = self._c_buffer_from_obj(hostbuf)
-            hostbuf_size = hostbuf.nbytes
+            c_hostbuf, hostbuf_size = self._c_buffer_from_obj(hostbuf)
             if size > hostbuf_size:
                 raise RuntimeError("Buffer", status_code.INVALID_VALUE, "specified size is greater than host buffer size")
             if size == 0:
@@ -494,13 +493,39 @@ def enqueue_nd_range_kernel(queue, kernel, global_work_size, local_work_size, gl
     ))
     return _create_instance(Event, ptr_event[0])
 
-def _enqueue_read_buffer(cq, mem, buf, device_offset=0, is_blocking=True):
-    c_buf = Buffer._c_buffer_from_obj(buf)
-    size = buf.nbytes
+def _enqueue_read_buffer(queue, mem, buf, device_offset=0, wait_for=None, is_blocking=True):
+    c_buf, size = Buffer._c_buffer_from_obj(buf)
     ptr_event = _ffi.new('void **')
     _handle_error(_lib._enqueue_read_buffer(
         ptr_event,
-        cq.ptr,
+        queue.ptr,
+        mem.ptr,
+        c_buf,
+        size,
+        device_offset,
+        bool(is_blocking)
+    ))
+    return _create_instance(Event, ptr_event[0])
+
+def _enqueue_copy_buffer(queue, src, dst, byte_count=-1, src_offset=0, dst_offset=0, wait_for=None):
+    ptr_event = _ffi.new('void **')
+    _handle_error(_lib._enqueue_copy_buffer(
+        ptr_event,
+        queue.ptr,
+        src.ptr,
+        dst.ptr,
+        byte_count,
+        src_offset,
+        dst_offset
+    ))
+    return _create_instance(Event, ptr_event[0])
+
+def _enqueue_write_buffer(queue, mem, hostbuf, device_offset=0, wait_for=None, is_blocking=True):
+    c_buf, size = Buffer._c_buffer_from_obj(hostbuf)
+    ptr_event = _ffi.new('void **')
+    _handle_error(_lib._enqueue_read_buffer(
+        ptr_event,
+        queue.ptr,
         mem.ptr,
         c_buf,
         size,
@@ -509,6 +534,7 @@ def _enqueue_read_buffer(cq, mem, buf, device_offset=0, is_blocking=True):
     ))
     return _create_instance(Event, ptr_event[0])
 
+    
 def _create_instance(cls, ptr):
     ins = cls.__new__(cls)
     ins.ptr = ptr
diff --git a/src/c_wrapper/wrap_cl.cpp b/src/c_wrapper/wrap_cl.cpp
index a145faef1f88b2ec10986899821efbf94c444c43..27e9c2682392246f209d2d916fe81f52cc4bc124 100644
--- a/src/c_wrapper/wrap_cl.cpp
+++ b/src/c_wrapper/wrap_cl.cpp
@@ -1864,10 +1864,40 @@ generic_info get_info(cl_device_info param_name) const
 						&evt
 						))
       // );
-
       PYOPENCL_RETURN_NEW_EVENT(evt);
   }
 
+  inline
+  event *enqueue_write_buffer(
+      command_queue &cq,
+      memory_object_holder &mem,
+      void *buffer,
+      size_t size,
+      size_t device_offset,
+      /*py::object py_wait_for,*/
+      bool is_blocking)
+  {
+    //PYOPENCL_PARSE_WAIT_FOR;
+
+    cl_event evt;
+    // TODO
+    //PYOPENCL_RETRY_IF_MEM_ERROR(
+    PYOPENCL_CALL_GUARDED_THREADED(clEnqueueWriteBuffer,
+				   (cq.data(),
+				    mem.data(),
+				    PYOPENCL_CAST_BOOL(is_blocking),
+				    device_offset, size, buffer,
+				    0, NULL, //PYOPENCL_WAITLIST_ARGS,
+				    &evt
+				    ));
+    //);
+  // TODO
+  PYOPENCL_RETURN_NEW_EVENT(evt);
+  //PYOPENCL_RETURN_NEW_NANNY_EVENT(evt, buffer);
+  }
+
+
+  
   // }}}
 
   inline event *enqueue_nd_range_kernel(
@@ -2134,6 +2164,25 @@ generic_info get_info(cl_device_info param_name) const
 		   )
       return 0;
   }
+
+  ::error *_enqueue_write_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_memory_object_holder, void *buffer, size_t size, size_t device_offset, int is_blocking) {
+    C_HANDLE_ERROR(
+		   *ptr_event = enqueue_write_buffer(*static_cast<pyopencl::command_queue*>(ptr_command_queue),
+						    *static_cast<pyopencl::memory_object_holder*>(ptr_memory_object_holder),
+						    buffer, size, device_offset, (bool)is_blocking);
+		   )
+      return 0;
+  }
+
+::error *_enqueue_copy_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_src, void *ptr_dst, ptrdiff_t byte_count, size_t src_offset, size_t dst_offset) {
+  C_HANDLE_ERROR(
+		 *ptr_event = enqueue_copy_buffer(*static_cast<pyopencl::command_queue*>(ptr_command_queue),
+						  *static_cast<pyopencl::memory_object_holder*>(ptr_src),
+						  *static_cast<pyopencl::memory_object_holder*>(ptr_dst),
+						  byte_count, src_offset, dst_offset);
+		 )
+    return 0;
+  }
   
   intptr_t _int_ptr(void* ptr, class_t class_) {
 #define INT_PTR(CLSU, CLS) return (intptr_t)(static_cast<pyopencl::CLS*>(ptr)->data());
diff --git a/src/c_wrapper/wrap_cl_core.h b/src/c_wrapper/wrap_cl_core.h
index a93852e1d62acba4d954fb822dd1f89465aea607..c2f777026366bbf4eebc74d85f43b169bc968edf 100644
--- a/src/c_wrapper/wrap_cl_core.h
+++ b/src/c_wrapper/wrap_cl_core.h
@@ -45,6 +45,8 @@ long _hash(void *ptr_platform, class_t);
 
 error *_enqueue_nd_range_kernel(void **ptr_event, void *ptr_command_queue, void *ptr_kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size);
 error *_enqueue_read_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_memory_object_holder, void *buffer, size_t size, size_t device_offset, int is_blocking);
+error *_enqueue_copy_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_src, void *ptr_dst, ptrdiff_t byte_count, size_t src_offset, size_t dst_offset);
+error *_enqueue_write_buffer(void **ptr_event, void *ptr_command_queue, void *ptr_memory_object_holder, void *buffer, size_t size, size_t device_offset, int is_blocking);
 void populate_constants(void(*add)(const char*, const char*, long value));
 
 intptr_t _int_ptr(void*, class_t);