diff --git a/pyopencl/c_wrapper/wrap_cl_core.h b/pyopencl/c_wrapper/wrap_cl_core.h
index 97b8c2ef0d1d45187469fabf9cb01a2e95f91d05..04c6267aedb372befef912d8048d85e431433d95 100644
--- a/pyopencl/c_wrapper/wrap_cl_core.h
+++ b/pyopencl/c_wrapper/wrap_cl_core.h
@@ -69,6 +69,12 @@ error *create_buffer(clobj_t *buffer, clobj_t context, cl_mem_flags flags,
                      size_t size, void *hostbuf);
 // Memory Object
 error *memory_object__release(clobj_t obj);
+// Memory Map
+error *memory_map__release(clobj_t _map, clobj_t _queue,
+                           const clobj_t *_wait_for, uint32_t num_wait_for,
+                           clobj_t *evt);
+void *memory_map__data(clobj_t _map);
+size_t memory_map__size(clobj_t _map);
 // Program
 error *create_program_with_source(clobj_t *program, clobj_t context,
                                   const char *src);
@@ -142,6 +148,10 @@ error *enqueue_write_buffer(clobj_t *event, clobj_t queue, clobj_t mem,
                             size_t device_offset, const clobj_t *wait_for,
                             uint32_t num_wait_for, int is_blocking,
                             void (*ref)(unsigned long));
+error *enqueue_map_buffer(clobj_t *_evt, clobj_t *mpa, clobj_t _queue,
+                          clobj_t _mem, cl_map_flags flags, size_t offset,
+                          size_t size, const clobj_t *_wait_for,
+                          uint32_t num_wait_for, int block);
 
 error *enqueue_read_image(clobj_t *event, clobj_t queue, clobj_t mem,
                           size_t *origin, size_t *region, void *buffer,
diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py
index 3e29e7afa706fd9a64ea6cdf10177648179778d2..b802c6b7eb488bf92d44cd5afad3a6c6ad2b8a98 100644
--- a/pyopencl/cffi_cl.py
+++ b/pyopencl/cffi_cl.py
@@ -33,6 +33,7 @@ import sys
 # TODO: can we do without ctypes?
 import ctypes
 from pyopencl._cffi import _ffi, _lib, _get_ref_func, _find_obj, _to_c_callback
+from .compyte.array import f_contiguous_strides, c_contiguous_strides
 
 # {{{ compatibility shims
 
@@ -459,6 +460,15 @@ class MemoryObject(MemoryObjectHolder):
         _handle_error(_lib.memory_object__release(self.ptr))
     # TODO hostbuf?
 
+class MemoryMap(_Common):
+    def release(self, queue=None, wait_for=None):
+        c_wait_for, num_wait_for = _clobj_list(wait_for)
+        _event = _ffi.new('clobj_t*')
+        _handle_error(_lib.memory_map__release(
+            self.ptr, queue.ptr if queue is not None else _ffi.NULL,
+            c_wait_for, num_wait_for, _event))
+        return _create_instance(Event, _event[0])
+
 def _c_buffer_from_obj(obj, writable=False):
     """Convert a Python object to a tuple (cdata('void *'), num_bytes, dummy)
     to be able to pass a data stream to a C function. The dummy variable exists
@@ -827,6 +837,44 @@ def _enqueue_write_buffer(queue, mem, hostbuf, device_offset=0,
         _get_ref_func(c_ref)))
     return _create_instance(NannyEvent, ptr_event[0])
 
+# PyPy bug report: https://bitbucket.org/pypy/pypy/issue/1777/unable-to-create-proper-numpy-array-from
+def enqueue_map_buffer(queue, buf, flags, offset, shape, dtype,
+                       order="C", strides=None, wait_for=None,
+                       is_blocking=True):
+    dtype = np.dtype(dtype)
+    if not isinstance(shape, tuple):
+        try:
+            shape = tuple(shape)
+        except:
+            shape = (shape,)
+    byte_size = dtype.itemsize
+    if strides is None:
+        if order == "cC":
+            strides = c_contigous_strides(byte_size, shape)
+        elif order == "cF":
+            strides = f_contigous_strides(byte_size, shape)
+        else:
+            raise RuntimeError("unrecognized order specifier %s" % order,
+                               'enqueue_map_buffer')
+    for s in shape:
+        byte_size *= s
+    c_wait_for, num_wait_for = _clobj_list(wait_for)
+    _event = _ffi.new('clobj_t*')
+    _map = _ffi.new('clobj_t*')
+    _handle_error(_lib.enqueue_map_buffer(_event, _map, queue.ptr, buf.ptr,
+                                          flags, offset, byte_size, c_wait_for,
+                                          num_wait_for, bool(is_blocking)))
+    event = _create_instance(Event, _event[0])
+    map = _create_instance(MemoryMap, _map[0])
+    map.__array_interface__ = {
+        'shape': shape,
+        'typestr': dtype.str,
+        'strides': strides,
+        'data': (int(_lib.clobj__int_ptr(_map[0])), False),
+        'version': 3
+        }
+    return np.asarray(map), event
+
 # }}}
 
 
diff --git a/src/c_wrapper/error.h b/src/c_wrapper/error.h
index 34b8571c8556f7b42b5c7a6d4cef50e5bc0bbb86..f1f3c3ff5e325cb06b7ce1c6a8f0b423d7371f92 100644
--- a/src/c_wrapper/error.h
+++ b/src/c_wrapper/error.h
@@ -4,6 +4,7 @@
 #include <iostream>
 #include <utility>
 #include <functional>
+#include <atomic>
 
 #ifndef __PYOPENCL_ERROR_H
 #define __PYOPENCL_ERROR_H
diff --git a/src/c_wrapper/wrap_cl.cpp b/src/c_wrapper/wrap_cl.cpp
index e015b22d95856c983f51d5fc4fd7354d19731fda..eca8f5991b8452b4fbc5124f0fd25dd7a3e024c8 100644
--- a/src/c_wrapper/wrap_cl.cpp
+++ b/src/c_wrapper/wrap_cl.cpp
@@ -630,6 +630,9 @@ public:
                   cl_command_queue_properties props=0)
         : clobj(create_cl_command_queue(ctx, py_dev, props))
     {}
+    command_queue(const command_queue &queue)
+        : command_queue(queue.data(), true)
+    {}
     ~command_queue()
     {
         pyopencl_call_guarded_cleanup(clReleaseCommandQueue, data());
@@ -901,7 +904,7 @@ public:
 
 class memory_object : public memory_object_holder {
 private:
-    mutable bool m_valid;
+    mutable volatile std::atomic_bool m_valid;
     void *m_hostbuf;
 public:
     memory_object(cl_mem mem, bool retain, void *hostbuf=0)
@@ -914,20 +917,23 @@ public:
             m_hostbuf = hostbuf;
         }
     }
-    void
-    release() const
+    memory_object(const memory_object &mem)
+        : memory_object(mem.data(), true)
+    {}
+    ~memory_object()
     {
-        if (!m_valid)
-            throw clerror("MemoryObject.free", CL_INVALID_VALUE,
-                          "trying to double-unref mem object");
+        if (!m_valid.exchange(false))
+            return;
         pyopencl_call_guarded_cleanup(clReleaseMemObject, data());
-        m_valid = false;
     }
-    ~memory_object()
+    void
+    release() const
     {
-        if (m_valid) {
-            release();
+        if (!m_valid.exchange(false)) {
+            throw clerror("MemoryObject.release", CL_INVALID_VALUE,
+                          "trying to double-unref mem object");
         }
+        pyopencl_call_guarded(clReleaseMemObject, data());
     }
     void*
     hostbuf() const
@@ -1536,6 +1542,67 @@ new_buffer(cl_mem mem, void *buff)
 
 // }}}
 
+// {{{ memory_map
+
+class memory_map : public clbase {
+private:
+    mutable volatile std::atomic_bool m_valid;
+    command_queue m_queue;
+    memory_object m_mem;
+    void *m_ptr;
+    size_t m_size;
+public:
+    memory_map(const command_queue *queue, const memory_object *mem,
+               void *ptr, size_t size)
+        : m_valid(true), m_queue(*queue), m_mem(*mem), m_ptr(ptr), m_size(size)
+    {}
+    ~memory_map()
+    {
+        if (!m_valid.exchange(false))
+            return;
+        pyopencl_call_guarded_cleanup(clEnqueueUnmapMemObject, m_queue.data(),
+                                      m_mem.data(), m_ptr, 0, NULL, NULL);
+    }
+    event*
+    release(const command_queue *queue, const clobj_t *_wait_for,
+            uint32_t num_wait_for) const
+    {
+        if (!m_valid.exchange(false)) {
+            throw clerror("MemoryMap.release", CL_INVALID_VALUE,
+                          "trying to double-unref mem map");
+        }
+        auto wait_for = buf_from_class<event>(_wait_for, num_wait_for);
+        queue = queue ? queue : &m_queue;
+        cl_event evt;
+        pyopencl_call_guarded(clEnqueueUnmapMemObject, queue->data(),
+                              m_mem.data(), m_ptr, num_wait_for,
+                              wait_for.get(), &evt);
+        return new_event(evt);
+    }
+    intptr_t
+    intptr() const
+    {
+        return (intptr_t)data();
+    }
+    generic_info
+    get_info(cl_uint) const
+    {
+        throw clerror("MemoryMap.get_info", CL_INVALID_VALUE);
+    }
+    void*
+    data() const
+    {
+        return m_valid ? m_ptr : NULL;
+    }
+    size_t
+    size() const
+    {
+        return m_valid ? m_size : 0;
+    }
+};
+
+// }}}
+
 // {{{ sampler
 
 class sampler : public clobj<cl_sampler> {
@@ -2023,6 +2090,31 @@ memory_object__release(clobj_t obj)
         });
 }
 
+
+// Memory Map
+error*
+memory_map__release(clobj_t _map, clobj_t _queue, const clobj_t *_wait_for,
+                    uint32_t num_wait_for, clobj_t *evt)
+{
+    auto map = static_cast<memory_map*>(_map);
+    auto queue = static_cast<command_queue*>(_queue);
+    return c_handle_error([&] {
+            *evt = map->release(queue, _wait_for, num_wait_for);
+        });
+}
+
+void*
+memory_map__data(clobj_t _map)
+{
+    return static_cast<memory_map*>(_map)->data();
+}
+
+size_t
+memory_map__size(clobj_t _map)
+{
+    return static_cast<memory_map*>(_map)->size();
+}
+
 // Program
 error*
 create_program_with_source(clobj_t *prog, clobj_t _ctx, const char *src)
@@ -2404,6 +2496,38 @@ enqueue_copy_buffer(clobj_t *_evt, clobj_t _queue, clobj_t _src, clobj_t _dst,
         });
 }
 
+error*
+enqueue_map_buffer(clobj_t *_evt, clobj_t *map, clobj_t _queue, clobj_t _mem,
+                   cl_map_flags flags, size_t offset, size_t size,
+                   const clobj_t *_wait_for, uint32_t num_wait_for,
+                   int block)
+{
+    auto wait_for = buf_from_class<event>(_wait_for, num_wait_for);
+    auto queue = static_cast<command_queue*>(_queue);
+    auto buf = static_cast<buffer*>(_mem);
+    return c_handle_error([&] {
+            cl_event evt;
+            void *res = retry_mem_error<void*>([&] {
+                    return pyopencl_call_guarded(
+                        clEnqueueMapBuffer, queue->data(), buf->data(),
+                        cast_bool(block), flags, offset, size, num_wait_for,
+                        wait_for.get(), &evt);
+                });
+            try {
+                *_evt = new event(evt, false);
+                evt = 0;
+                *map = new memory_map(queue, buf, res, size);
+            } catch (...) {
+                if (evt) {
+                    pyopencl_call_guarded_cleanup(clReleaseEvent, evt);
+                }
+                pyopencl_call_guarded_cleanup(clEnqueueUnmapMemObject,
+                                              queue->data(), buf->data(),
+                                              res, 0, NULL, NULL);
+                throw;
+            }
+        });
+}
 
 error*
 enqueue_read_image(clobj_t *_evt, clobj_t _queue, clobj_t _mem, size_t *origin,