diff --git a/doc/source/runtime.rst b/doc/source/runtime.rst
index 4dd2c14f9ebaa8e54a6e4709d8deada6534306b7..547ce01876393cc7650b8fc4520e01fa331aa2cc 100644
--- a/doc/source/runtime.rst
+++ b/doc/source/runtime.rst
@@ -601,7 +601,7 @@ Mapping Memory into Host Address Space
 
     .. method:: release(queue=None, wait_for=None)
 
-.. function:: enqueue_map_buffer(queue, buf, flags, offset, shape, dtype, order, wait_for=None, is_blocking=True)
+.. function:: enqueue_map_buffer(queue, buf, flags, offset, shape, dtype, order="C", wait_for=None, is_blocking=True)
 
     |explain-waitfor|
     *shape*, *dtype*, and *order* have the same meaning
@@ -616,7 +616,10 @@ Mapping Memory into Host Address Space
     .. versionchanged:: 2011.1
         *is_blocking* now defaults to True.
 
-.. function:: enqueue_map_image(queue, buf, flags, origin, region, shape, dtype, order, wait_for=None, is_blocking=True)
+    .. versionchanged:: 2012.2
+        *order* now defaults to "C".
+
+.. function:: enqueue_map_image(queue, buf, flags, origin, region, shape, dtype, order="C", wait_for=None, is_blocking=True)
 
     |explain-waitfor|
     *shape*, *dtype*, and *order* have the same meaning
@@ -631,6 +634,8 @@ Mapping Memory into Host Address Space
     .. versionchanged:: 2011.1
         *is_blocking* now defaults to True.
 
+    .. versionchanged:: 2012.2
+        *order* now defaults to "C".
 
 Samplers
 ^^^^^^^^
@@ -830,6 +835,19 @@ Programs and Kernels
         *global_size* and *local_size* also do not have to have the same number
         of dimensions.
 
+        .. note::
+
+            :meth:`__call__` is *not* thread-safe. It sets the arguments using :meth:`set_args`
+            and then runs :func:`enqueue_nd_range_kernel`. Another thread could race it
+            in doing the same things, with undefined outcome. This issue is inherited
+            from the C-level OpenCL API. The recommended solution is to make a kernel
+            (i.e. access `prg.kernel_name`, which corresponds to making a new kernel)
+            for every thread that may enqueue calls to the kernel.
+
+            A solution involving implicit locks was discussed and decided against on the
+            mailing list in `October 2012
+            <http://lists.tiker.net/pipermail/pyopencl/2012-October/001311.html>`_.
+
         .. versionchanged:: 0.92
             *local_size* was promoted to third positional argument from being a
             keyword argument. The old keyword argument usage will continue to
diff --git a/src/wrapper/wrap_cl_part_2.cpp b/src/wrapper/wrap_cl_part_2.cpp
index cc7942c8b6af64cdb5aaf9fe44419c46f3185ae1..c48ee7aef39c50bff349caa918fab64d90617d1c 100644
--- a/src/wrapper/wrap_cl_part_2.cpp
+++ b/src/wrapper/wrap_cl_part_2.cpp
@@ -151,13 +151,15 @@ void pyopencl_expose_part_2()
   py::def("enqueue_map_buffer", enqueue_map_buffer,
       (py::args("queue", "buf", "flags",
                 "offset",
-                "shape", "dtype", "order"),
+                "shape", "dtype"),
+       py::arg("order")="C",
        py::arg("wait_for")=py::object(),
        py::arg("is_blocking")=true));
   py::def("enqueue_map_image", enqueue_map_image,
       (py::args("queue", "img", "flags",
                 "origin", "region",
-                "shape", "dtype", "order"),
+                "shape", "dtype"),
+       py::arg("order")="C",
        py::arg("wait_for")=py::object(),
        py::arg("is_blocking")=true));