diff --git a/pyopencl/array.py b/pyopencl/array.py
index 33fa2fc02cdba563c306253526274f2f869a914d..c6d19cc06b50d5724784714ca85cab0c9557dad5 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -1181,8 +1181,8 @@ def multi_put(arrays, dest_indices, dest_shape=None, out=None, queue=None):
 
     def make_func_for_chunk_size(chunk_size):
         knl = elementwise.get_put_kernel(
+                context,
                 a_dtype, dest_indices.dtype, vec_count=chunk_size)
-        knl.set_block_shape(*dest_indices._block)
         return knl
 
     knl = make_func_for_chunk_size(chunk_size)
diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py
index 60c4bc1108c261dffd05210a59f6f159b6c85f7c..77b9626b5b5f1d7eb827646ce45acc2ea93ac9e0 100644
--- a/pyopencl/elementwise.py
+++ b/pyopencl/elementwise.py
@@ -380,7 +380,7 @@ def get_put_kernel(context, dtype, idx_dtype, vec_count=1):
             + "\n".join("dest%d[dest_idx] = src%d[i];" % (i, i)
                 for i in range(vec_count)))
 
-    return get_elwise_kernel(args, body, name="put")
+    return get_elwise_kernel(context, args, body, name="put")
 
 
 @context_dependent_memoize