diff --git a/pyopencl/c_wrapper/wrap_cl_core.h b/pyopencl/c_wrapper/wrap_cl_core.h
index a586a33dc8f89eff392434b3fc2c00a76417fefb..9f1d0d6d083a737174c6bdb7cc4082d96ccf95c5 100644
--- a/pyopencl/c_wrapper/wrap_cl_core.h
+++ b/pyopencl/c_wrapper/wrap_cl_core.h
@@ -69,7 +69,7 @@ error *image__get_image_info(void *ptr_image, cl_image_info param, generic_info
 
 long _hash(void *ptr_platform, class_t);
 
-error *_enqueue_nd_range_kernel(void **ptr_event, void *ptr_command_queue, void *ptr_kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size);
+error *_enqueue_nd_range_kernel(void **ptr_event, void *ptr_command_queue, void *ptr_kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, void **wait_for, uint32_t num_wait_for);
 
 error *_enqueue_marker_with_wait_list(void **ptr_event, void *ptr_command_queue,
                                       void **wait_for, uint32_t num_wait_for);
diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py
index 58a2e014b5c7d4d0395fede3289d6a1960d7db28..a58cbf1441da52a90a8ef0cecfc9c9079a9dcc05 100644
--- a/pyopencl/cffi_cl.py
+++ b/pyopencl/cffi_cl.py
@@ -660,10 +660,6 @@ def enqueue_nd_range_kernel(queue, kernel,
         global_work_size, local_work_size, global_work_offset=None,
         wait_for=None, g_times_l=False):
 
-    if wait_for is not None:
-        # TODO: implement wait_for
-        raise NotImplementedError("wait_for")
-
     work_dim = len(global_work_size)
 
     if local_work_size is not None:
@@ -695,6 +691,7 @@ def enqueue_nd_range_kernel(queue, kernel,
         c_local_work_size = _ffi.new('const size_t[]', local_work_size)
 
     ptr_event = _ffi.new('void **')
+    c_wait_for, num_wait_for = _c_obj_list(wait_for)
     _handle_error(_lib._enqueue_nd_range_kernel(
         ptr_event,
         queue.ptr,
@@ -702,7 +699,8 @@ def enqueue_nd_range_kernel(queue, kernel,
         work_dim,
         c_global_work_offset,
         c_global_work_size,
-        c_local_work_size
+        c_local_work_size,
+        c_wait_for, num_wait_for
     ))
     return _create_instance(Event, ptr_event[0])
 
diff --git a/src/c_wrapper/wrap_cl.cpp b/src/c_wrapper/wrap_cl.cpp
index 18d2c0cb727d2800a59bb7d67cfc527942dc8aa6..1d60f586530aa883a8d5180292b5c2821efdc0ba 100644
--- a/src/c_wrapper/wrap_cl.cpp
+++ b/src/c_wrapper/wrap_cl.cpp
@@ -2811,13 +2811,10 @@ namespace pyopencl
       cl_uint work_dim,
       const size_t *global_work_offset,
       const size_t *global_work_size,
-      const size_t *local_work_size //,
-      //py::object py_global_work_offset,
-      //py::object py_wait_for,
-      )
+      const size_t *local_work_size,
+      void **wait_for, uint32_t num_wait_for)
   {
-    // TODO
-    // PYOPENCL_PARSE_WAIT_FOR;
+    PYOPENCL_PARSE_WAIT_FOR;
 
     cl_event evt;
 
@@ -2829,7 +2826,7 @@ namespace pyopencl
                            global_work_offset,
                            global_work_size,
                            local_work_size,
-                           0, NULL,// PYOPENCL_WAITLIST_ARGS,
+                           PYOPENCL_WAITLIST_ARGS,
                            &evt
                            ));
     PYOPENCL_RETURN_NEW_EVENT(evt);
@@ -3262,7 +3259,8 @@ error *_create_image_3d(
 ::error *_enqueue_nd_range_kernel(
     void **ptr_event, void *ptr_command_queue, void *ptr_kernel,
     cl_uint work_dim, const size_t *global_work_offset,
-    const size_t *global_work_size, const size_t *local_work_size)
+    const size_t *global_work_size, const size_t *local_work_size,
+    void **wait_for, uint32_t num_wait_for)
 {
   BEGIN_C_HANDLE_ERROR
 
@@ -3272,7 +3270,8 @@ error *_create_image_3d(
       work_dim,
       global_work_offset,
       global_work_size,
-      local_work_size);
+      local_work_size,
+      wait_for, num_wait_for);
 
   END_C_HANDLE_ERROR