diff --git a/pyopencl/c_wrapper/wrap_cl_core.h b/pyopencl/c_wrapper/wrap_cl_core.h
index fb4770c0275967b9651bd106925f80c3406ab965..6aa06a18442172ba8b814621a141f1991dff89d4 100644
--- a/pyopencl/c_wrapper/wrap_cl_core.h
+++ b/pyopencl/c_wrapper/wrap_cl_core.h
@@ -64,8 +64,8 @@ error *platform__get_devices(clobj_t platform, clobj_t **ptr_devices,
 error *platform__unload_compiler(clobj_t plat);
 // Device
 error *device__create_sub_devices(clobj_t _dev, clobj_t **_devs,
-                                  const cl_device_partition_property *props,
-                                  uint32_t *num_devices);
+                                  uint32_t *num_devices,
+                                  const cl_device_partition_property *props);
 // Context
 error *create_context(clobj_t *ctx, const cl_context_properties *props,
                       cl_uint num_devices, const clobj_t *ptr_devices);
diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py
index c4172b63cfd0d53f28af562e7d170946a966b222..568e61f1683f36291b72a6779f90a5233ffef178 100644
--- a/pyopencl/cffi_cl.py
+++ b/pyopencl/cffi_cl.py
@@ -503,7 +503,13 @@ def get_platforms():
 
 class Device(_Common):
     _id = 'device'
-    # TODO create_sub_devices
+    def create_sub_devices(self, props):
+        props = tuple(props) + (0,)
+        devices = _CArray(_ffi.new('clobj_t**'))
+        _handle_error(_lib.device__create_sub_devices(
+            self.ptr, devices.ptr, devices.size, props))
+        return [Device._create(devices.ptr[0][i])
+                for i in xrange(devices.size[0])]
     # TODO create_sub_devices_ext
 
 # }}}
diff --git a/src/c_wrapper/device.cpp b/src/c_wrapper/device.cpp
index b4ba45755c4546e74b922965c28ccb3db878467c..06697a3e59744717c929197c000975b1bfc4acac 100644
--- a/src/c_wrapper/device.cpp
+++ b/src/c_wrapper/device.cpp
@@ -273,8 +273,8 @@ using namespace pyopencl;
 #if PYOPENCL_CL_VERSION >= 0x1020
 error*
 device__create_sub_devices(clobj_t _dev, clobj_t **_devs,
-                           const cl_device_partition_property *props,
-                           uint32_t *num_devices)
+                           uint32_t *num_devices,
+                           const cl_device_partition_property *props)
 {
     auto dev = static_cast<device*>(_dev);
     return c_handle_error([&] {