diff --git a/doc/runtime_memory.rst b/doc/runtime_memory.rst
index 3f85a6caf6f1a9e7a507614f0fb9d338be5703d3..3816abaa7785016179a45c24afb065f7c1fbac12 100644
--- a/doc/runtime_memory.rst
+++ b/doc/runtime_memory.rst
@@ -33,7 +33,7 @@ OpenCL Runtime: Memory
     |comparable|
 
 Memory Migration
-^^^^^^^^^^^^^^^^
+----------------
 
 .. function:: enqueue_migrate_mem_objects(queue, mem_objects, flags=0, wait_for=None)
 
@@ -52,8 +52,8 @@ Memory Migration
     Only available with the `cl_ext_migrate_memobject`
     extension.
 
-Buffers
-^^^^^^^
+Buffer
+------
 
 .. class:: Buffer(context, flags, size=0, hostbuf=None)
 
@@ -107,24 +107,39 @@ Buffers
 
 .. _svm:
 
-Shared Virtual Memory
-^^^^^^^^^^^^^^^^^^^^^
+Shared Virtual Memory (SVM)
+---------------------------
 
-Shared virtual memory requires OpenCL 2.0.
+Shared virtual memory allows the host and the compute device to share
+address space, so that pointers on the host and on the device may have
+the same meaning. In addition, it allows the same memory to be accessed
+by both the host and the device. *Coarse-grain* SVM requires that
+buffers be mapped before being accessed on the host, *fine-grain* SVM
+does away with that requirement.
 
-.. autoclass:: SVMAllocation
+SVM requires OpenCL 2.0.
 
 .. autoclass:: SVM
 
 .. autoclass:: SVMMap
 
+Allocating SVM
+^^^^^^^^^^^^^^
+
+.. autofunction:: svm_empty
+.. autofunction:: svm_empty_like
 .. autofunction:: csvm_empty
-.. autofunction:: fsvm_empty
 .. autofunction:: csvm_empty_like
+.. autofunction:: fsvm_empty
 .. autofunction:: fsvm_empty_like
 
-Image Formats
-^^^^^^^^^^^^^
+SVM Allocation Holder
+^^^^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: SVMAllocation
+
+Image
+-----
 
 .. class:: ImageFormat([channel_order, channel_type])
 
@@ -169,8 +184,6 @@ Image Formats
     See :class:`mem_flags` for possible values of *flags*
     and :class:`mem_object_type` for possible values of *image_type*.
 
-Images
-^^^^^^
 .. class:: Image(context, flags, format, shape=None, pitches=None, hostbuf=None, is_array=False, buffer=None):
 
     See :class:`mem_flags` for values of *flags*.
@@ -255,12 +268,12 @@ Images
     .. versionadded:: 2011.2
 
 Transfers
-^^^^^^^^^
+---------
 
 .. autofunction:: enqueue_copy(queue, dest, src, **kwargs)
 
 Mapping Memory into Host Address Space
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+--------------------------------------
 
 .. autoclass:: MemoryMap
 
@@ -309,7 +322,7 @@ Mapping Memory into Host Address Space
         Added *strides* argument.
 
 Samplers
-^^^^^^^^
+--------
 
 .. class:: Sampler(context, normalized_coords, addressing_mode, filter_mode)
 
diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py
index 9100c844cfdda55105c35043bf72cb163dcde4a2..166601f27e038e641f0c35ac44c25b78e4b4d992 100644
--- a/pyopencl/__init__.py
+++ b/pyopencl/__init__.py
@@ -112,6 +112,10 @@ from pyopencl.cffi_cl import (  # noqa
         MemoryObject,
         MemoryMap,
         Buffer,
+        SVMAllocation,
+        SVM,
+        SVMMap,
+
         CompilerWarning,
         _Program,
         Kernel,
@@ -695,7 +699,8 @@ def enqueue_copy(queue, dest, src, **kwargs):
         Two types of 'buffer' occur in the arguments to this function,
         :class:`Buffer` and 'host-side buffers'. The latter are
         defined by Python and commonly called `buffer objects
-        <https://docs.python.org/3.4/c-api/buffer.html>`_.
+        <https://docs.python.org/3.4/c-api/buffer.html>`_. :mod:`numpy`
+        arrays are a very common example.
         Make sure to always be clear on whether a :class:`Buffer` or a
         Python buffer object is needed.
 
@@ -788,6 +793,15 @@ def enqueue_copy(queue, dest, src, **kwargs):
     :arg region: :class:`tuple` of :class:`int` of length
         three or shorter. (mandatory)
 
+    .. ------------------------------------------------------------------------
+    .. rubric :: Transfer :class:`SVM`/host ↔ :class:`SVM`/host
+    .. ------------------------------------------------------------------------
+
+    :arg byte_count: (optional) If not specified, defaults to the
+        size of the source in versions 2012.x and earlier,
+        and to the minimum of the size of the source and target
+        from 2013.1 on.
+
     |std-enqueue-blurb|
 
     .. versionadded:: 2011.1
@@ -983,4 +997,153 @@ def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None):
 # }}}
 
 
+# {{{ numpy-like svm allocation
+
+def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None):
+    """Allocate an empty :class:`numpy.ndarray` of the given *shape*, *dtype*
+    and *order*. (See :func:`numpy.empty` for the meaning of these arguments.)
+    The array will be allocated in shared virtual memory belonging
+    to *ctx*.
+
+    :arg ctx: a :class:`Context`
+    :arg flags: a combination of flags from :class:`svm_mem_flags`.
+    :arg alignment: the number of bytes to which the beginning of the memory
+        is aligned. Defaults to the :attr:`numpy.dtype.itemsize` of *dtype*.
+
+    :returns: a :class:`numpy.ndarray` whose :attr:`numpy.ndarray.base` attribute
+        is a :class:`SVMAllocation`.
+
+    To pass the resulting array to an OpenCL kernel or :func:`enqueue_copy`, you
+    will likely want to wrap the returned array in an :class:`SVM` tag.
+
+    .. versionadded:: 2016.2
+    """
+
+    dtype = np.dtype(dtype)
+
+    try:
+        s = 1
+        for dim in shape:
+            s *= dim
+    except TypeError:
+        import sys
+        if sys.version_info >= (3,):
+            admissible_types = (int, np.integer)
+        else:
+            admissible_types = (np.integer,) + six.integer_types
+
+        if not isinstance(shape, admissible_types):
+            raise TypeError("shape must either be iterable or "
+                    "castable to an integer")
+        s = shape
+        shape = (shape,)
+
+    itemsize = dtype.itemsize
+    nbytes = s * itemsize
+
+    from pyopencl.compyte.array import c_contiguous_strides, f_contiguous_strides
+
+    if order in "fF":
+        strides = f_contiguous_strides(itemsize, shape)
+    elif order in "cC":
+        strides = c_contiguous_strides(itemsize, shape)
+    else:
+        raise ValueError("order not recognized: %s" % order)
+
+    descr = dtype.descr
+    if len(descr) == 1:
+        typestr = descr[0][1]
+    else:
+        typestr = "V%d" % itemsize
+
+    interface = {
+        "version": 3,
+        "shape": shape,
+        "typestr": typestr,
+        "descr": descr,
+        "strides": strides,
+        }
+
+    if alignment is None:
+        alignment = itemsize
+
+    svm_alloc = SVMAllocation(ctx, nbytes, alignment, flags, _interface=interface)
+    return SVM(np.asarray(svm_alloc))
+
+
+def svm_empty_like(ctx, flags, ary, alignment=None):
+    """Allocate an empty :class:`numpy.ndarray` like the existing
+    :class:`numpy.ndarray` *ary*.  The array will be allocated in shared
+    virtual memory belonging to *ctx*.
+
+    :arg ctx: a :class:`Context`
+    :arg flags: a combination of flags from :class:`svm_mem_flags`.
+    :arg alignment: the number of bytes to which the beginning of the memory
+        is aligned. Defaults to the :attr:`numpy.dtype.itemsize` of *dtype*.
+
+    :returns: a :class:`numpy.ndarray` whose :attr:`numpy.ndarray.base` attribute
+        is a :class:`SVMAllocation`.
+
+    To pass the resulting array to an OpenCL kernel or :func:`enqueue_copy`, you
+    will likely want to wrap the returned array in an :class:`SVM` tag.
+
+    .. versionadded:: 2016.2
+    """
+    if ary.flags.c_contiguous:
+        order = "C"
+    elif ary.flags.f_contiguous:
+        order = "F"
+    else:
+        raise ValueError("array is neither C- nor Fortran-contiguous")
+
+    return svm_empty(ctx, ary.shape, ary.dtype, order,
+            flags=flags, alignment=alignment)
+
+
+def csvm_empty(ctx, shape, dtype, order="C", alignment=None):
+    """
+    Like :func:`svm_empty`, but with *flags* set for a coarse-grain read-write
+    buffer.
+
+    .. versionadded:: 2016.2
+    """
+    return svm_empty(ctx, svm_mem_flags.READ_WRITE, shape, dtype, order, alignment)
+
+
+def csvm_empty_like(ctx, ary, alignment=None):
+    """
+    Like :func:`svm_empty_like`, but with *flags* set for a coarse-grain
+    read-write buffer.
+
+    .. versionadded:: 2016.2
+    """
+    return svm_empty_like(ctx, svm_mem_flags.READ_WRITE, ary)
+
+
+def fsvm_empty(ctx, shape, dtype, order="C", alignment=None):
+    """
+    Like :func:`svm_empty`, but with *flags* set for a fine-grain read-write
+    buffer.
+
+    .. versionadded:: 2016.2
+    """
+    return svm_empty(ctx,
+            svm_mem_flags.READ_WRITE | svm_mem_flags.SVM_FINE_GRAIN_BUFFER,
+            shape, dtype, order, alignment)
+
+
+def fsvm_empty_like(ctx, ary, alignment=None):
+    """
+    Like :func:`svm_empty_like`, but with *flags* set for a fine-grain
+    read-write buffer.
+
+    .. versionadded:: 2016.2
+    """
+    return svm_empty_like(
+            ctx,
+            svm_mem_flags.READ_WRITE | svm_mem_flags.SVM_FINE_GRAIN_BUFFER,
+            ary)
+
+# }}}
+
 # vim: foldmethod=marker
diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py
index b4c104f0aeb74dc731763d540b9ba4c81d66943d..31df8050007d787cebc813e6b0774312d7dd6a13 100644
--- a/pyopencl/cffi_cl.py
+++ b/pyopencl/cffi_cl.py
@@ -804,7 +804,7 @@ class CommandQueue(_Common):
 # }}}
 
 
-# {{{ memory objects
+# {{{ _norm_shape_dtype and cffi_array
 
 def _norm_shape_dtype(shape, dtype, order="C", strides=None, name=""):
     dtype = np.dtype(dtype)
@@ -839,17 +839,10 @@ class cffi_array(np.ndarray):  # noqa
     def base(self):
         return self.__base
 
+# }}}
 
-class LocalMemory(_CLKernelArg):
-    __slots__ = ('_size',)
-
-    def __init__(self, size):
-        self._size = size
-
-    @property
-    def size(self):
-        return self._size
 
+# {{{ MemoryObjectHolder base class
 
 class MemoryObjectHolder(_Common, _CLKernelArg):
     def get_host_array(self, shape, dtype, order="C"):
@@ -867,6 +860,10 @@ class MemoryObjectHolder(_Common, _CLKernelArg):
                              "MemoryObjectHolder.get_host_array")
         return ary
 
+# }}}
+
+
+# {{{ MemoryObject
 
 class MemoryObject(MemoryObjectHolder):
     def __init__(self, hostbuf=None):
@@ -894,8 +891,18 @@ class MemoryObject(MemoryObjectHolder):
     def release(self):
         _handle_error(_lib.memory_object__release(self.ptr))
 
+# }}}
+
+
+# {{{ MemoryMap
 
 class MemoryMap(_Common):
+    """"
+    .. automethod:: release
+
+    This class may also be used as a context manager in a ``with`` statement.
+    """
+
     @classmethod
     def _create(cls, ptr, shape, typestr, strides):
         self = _Common._create.__func__(cls, ptr)
@@ -908,6 +915,12 @@ class MemoryMap(_Common):
         }
         return self
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.release()
+
     def release(self, queue=None, wait_for=None):
         c_wait_for, num_wait_for = _clobj_list(wait_for)
         _event = _ffi.new('clobj_t*')
@@ -916,6 +929,8 @@ class MemoryMap(_Common):
             c_wait_for, num_wait_for, _event))
         return Event._create(_event[0])
 
+# }}}
+
 
 # {{{ _c_buffer_from_obj
 
@@ -960,8 +975,6 @@ elif sys.version_info >= (2, 7, 4):
         _ssize_t = ctypes.c_size_t
 
     def _c_buffer_from_obj(obj, writable=False, retain=False):
-        # {{{ fall back to the old CPython buffer protocol API
-
         # {{{ try the numpy array interface first
 
         # avoid slow ctypes-based buffer interface wrapper
@@ -976,6 +989,8 @@ elif sys.version_info >= (2, 7, 4):
 
         # }}}
 
+        # {{{ fall back to the old CPython buffer protocol API
+
         from pyopencl._buffers import Py_buffer, PyBUF_ANY_CONTIGUOUS, PyBUF_WRITABLE
 
         flags = PyBUF_ANY_CONTIGUOUS
@@ -1071,6 +1086,233 @@ class Buffer(MemoryObject):
 
 # }}}
 
+
+# {{{ SVMAllocation
+
+class SVMAllocation(object):
+    """An object whose lifetime is tied to an allocation of shared virtual memory.
+
+    .. note::
+
+        Most likely, you will not want to use this directly, but rather
+        :func:`svm_empty` and related functions which allow access to this
+        functionality using a friendlier, more Pythonic interface.
+
+    .. versionadded:: 2016.2
+
+    .. automethod:: __init__(self, ctx, size, alignment, flags=None)
+    .. automethod:: release
+    .. automethod:: enqueue_release
+    """
+    def __init__(self, ctx, size, alignment, flags, _interface=None):
+        """
+        :arg ctx: a :class:`Context`
+        :arg flags: some of :class:`svm_mem_flags`.
+        """
+
+        self.ptr = None
+
+        ptr = _ffi.new('void**')
+        _handle_error(_lib.svm_alloc(
+            ctx.ptr, flags, size, alignment,
+            ptr))
+
+        self.ctx = ctx
+        self.ptr = ptr[0]
+        self.is_fine_grain = flags & svm_mem_flags.SVM_FINE_GRAIN_BUFFER
+
+        if _interface is not None:
+            _interface["data"] = (
+                    int(_ffi.cast("intptr_t", self.ptr)),
+                    flags & mem_flags.WRITE_ONLY != 0
+                    or flags & mem_flags.READ_WRITE != 0)
+            self.__array_interface__ = _interface
+
+    def __del__(self):
+        if self.ptr is not None:
+            self.release()
+
+    def release(self):
+        _handle_error(_lib.svm_free(self.ctx.ptr, self.ptr))
+        self.ptr = None
+
+    def enqueue_release(self, queue, wait_for=None):
+        """
+        :arg flags: a combination of :class:`pyopencl.map_flags`
+        :returns: a :class:`pyopencl.Event`
+
+        |std-enqueue-blurb|
+        """
+        ptr_event = _ffi.new('clobj_t*')
+        c_wait_for, num_wait_for = _clobj_list(wait_for)
+        _handle_error(_lib.enqueue_svm_free(
+            ptr_event, queue.ptr, 1, self.ptr,
+            c_wait_for, num_wait_for))
+
+        self.ctx = None
+        self.ptr = None
+
+        return Event._create(ptr_event[0])
+
+# }}}
+
+
+# {{{ SVM
+
+#TODO:
+# doc example
+# finish copy
+#  test
+# fill
+#  test
+# migrate
+
+class SVM(_CLKernelArg):
+    """Tags an object exhibiting the Python buffer interface (such as a
+    :class:`numpy.ndarray`) as referring to shared virtual memory.
+
+    Depending on the features of the OpenCL implementation, the following
+    types of objects may be passed to/wrapped in this type:
+
+    *   coarse-grain shared memory as returned by (e.g.) :func:`csvm_empty`
+        for any implementation of OpenCL 2.0.
+
+    *   fine-grain shared memory as returned by (e.g.) :func:`fsvm_empty`,
+        if the implementation supports fine-grained shared virtual memory.
+
+    *   any :class:`numpy.ndarray` (or other Python object with a buffer
+        interface) if the implementation supports fine-grained *system* shared
+        virtual memory.
+
+    Objects of this type may be passed to kernel calls and :func:`enqueue_copy`.
+    Coarse-grain shared-memory *must* be mapped into host address space using
+    :meth:`map` before being accessed through the :mod:`numpy` interface.
+
+    .. note::
+
+        This object merely serves as a 'tag' that changes the meaning
+        of functions to which it is passed. It has no special management
+        relationship to the memory it tags. For example, it is permissible
+        to grab a :mod:`numpy.array` out of :attr:`SVM.memory` of one
+        :class:`SVM` instance and use the array to construct another.
+        Neither of the tags needs to be kept alive.
+
+    .. versionadded:: 2016.2
+
+    .. attribute:: mem
+
+        The wrapped object.
+
+    .. automethod:: __init__
+    .. automethod:: map
+    .. automethod:: as_buffer
+    """
+
+    def __init__(self, mem):
+        self.mem = mem
+
+    def map(self, queue, is_blocking=True, flags=None, wait_for=None):
+        """
+        :arg is_blocking: If *False*, subsequent code must wait on
+            :attr:`SVMMap.event` in the returned object before accessing the
+            mapped memory.
+        :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to
+            read-write.
+        :returns: an :class:`SVMMap` instance
+
+        |std-enqueue-blurb|
+        """
+        if flags is None:
+            flags = map_flags.READ | map_flags.WRITE
+
+        c_buf, size, _ = _c_buffer_from_obj(self.mem, writable=bool(
+            flags & (map_flags.WRITE | map_flags.INVALIDATE_REGION)))
+
+        ptr_event = _ffi.new('clobj_t*')
+        c_wait_for, num_wait_for = _clobj_list(wait_for)
+        _handle_error(_lib.enqueue_svm_map(
+            ptr_event, queue.ptr, is_blocking, flags,
+            c_buf, size,
+            c_wait_for, num_wait_for))
+
+        evt = Event._create(ptr_event[0]), SVMMap(self.mem)
+        return SVMMap(self, queue, evt)
+
+    def _enqueue_unmap(self, queue, wait_for=None):
+        c_buf, _, _ = _c_buffer_from_obj(self.mem)
+
+        ptr_event = _ffi.new('clobj_t*')
+        c_wait_for, num_wait_for = _clobj_list(wait_for)
+        _handle_error(_lib.enqueue_svm_unmap(
+            ptr_event, queue.ptr,
+            c_buf,
+            c_wait_for, num_wait_for))
+
+        return Event._create(ptr_event[0]), SVMMap(self.mem)
+
+    def as_buffer(self, ctx, flags=None):
+        """
+        :arg ctx: a :class:`Context`
+        :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to
+            read-write.
+        :returns: a :class:`Buffer` corresponding to *self*.
+
+        The memory referred to by this object must not be freed before
+        the returned :class:`Buffer` is released.
+        """
+
+        if flags is None:
+            flags = mem_flags.READ_WRITE
+
+        return Buffer(ctx, flags, size=self.mem.nbytes, hostbuf=self.mem)
+
+# }}}
+
+
+# {{{ SVMMap
+
+class SVMMap(_CLKernelArg):
+    """
+    .. attribute:: event
+
+    .. versionadded:: 2016.2
+
+    .. automethod:: release
+
+    This class may also be used as a context manager in a ``with`` statement.
+    :meth:`release` will be called upon exit from the ``with`` region.
+    The value returned to the ``as`` part of the context manager is the
+    mapped Python object (e.g. a :mod:`numpy` array).
+    """
+    def __init__(self, svm, queue, event):
+        self.svm = svm
+        self.queue = queue
+        self.event = event
+
+    def __del__(self):
+        if self.svm is not None:
+            self.release()
+
+    def __enter__(self):
+        return self.svm.mem
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.release()
+
+    def release(self, queue=None, wait_for=None):
+        """
+        :arg queue: a :class:`pyopencl.CommandQueue`. Defaults to the one
+            with which the map was created, if not specified.
+        :returns: a :class:`pyopencl.Event`
+
+        |std-enqueue-blurb|
+        """
+
+        evt = self.svm._enqueue_unmap(self.queue)
+        self.svm = None
+
+        return evt
+
 # }}}
 
 
@@ -1246,6 +1488,17 @@ class _Program(_Common):
 # }}}
 
 
+class LocalMemory(_CLKernelArg):
+    __slots__ = ('_size',)
+
+    def __init__(self, size):
+        self._size = size
+
+    @property
+    def size(self):
+        return self._size
+
+
 # {{{ Kernel
 
 # {{{ arg packing helpers
@@ -1407,8 +1660,8 @@ class Kernel(_Common):
                 status = _lib.kernel__set_arg_null(self.ptr, {arg_idx})
                 if status != _ffi.NULL:
                     _handle_error(status)
-            elif isinstance({arg_var}, _CLKernelArg):
-                self.set_arg({arg_idx}, {arg_var})
+            else:
+                self._set_arg_clkernelarg({arg_idx}, {arg_var})
             """
             .format(arg_idx=arg_idx, arg_var=arg_var))
 
@@ -1588,19 +1841,30 @@ class Kernel(_Common):
         capture_kernel_call(self, filename, queue, global_size, local_size,
                 *args, **kwargs)
 
+    def _set_arg_clkernelarg(self, arg_index, arg):
+        if isinstance(arg, MemoryObjectHolder):
+            _handle_error(_lib.kernel__set_arg_mem(self.ptr, arg_index, arg.ptr))
+        elif isinstance(arg, SVM):
+            c_buf, _, _ = _c_buffer_from_obj(arg.mem)
+            _handle_error(_lib.kernel__set_arg_svm_pointer(
+                self.ptr, arg_index, c_buf))
+        elif isinstance(arg, Sampler):
+            _handle_error(_lib.kernel__set_arg_sampler(self.ptr, arg_index,
+                                                       arg.ptr))
+        elif isinstance(arg, LocalMemory):
+            _handle_error(_lib.kernel__set_arg_buf(self.ptr, arg_index,
+                                                   _ffi.NULL, arg.size))
+        else:
+            raise RuntimeError("unexpected _CLKernelArg subclass"
+                               "dimensions", status_code.INVALID_VALUE,
+                               "clSetKernelArg")
+
     def set_arg(self, arg_index, arg):
         # If you change this, also change the kernel call generation logic.
         if arg is None:
             _handle_error(_lib.kernel__set_arg_null(self.ptr, arg_index))
         elif isinstance(arg, _CLKernelArg):
-            if isinstance(arg, MemoryObjectHolder):
-                _handle_error(_lib.kernel__set_arg_mem(self.ptr, arg_index, arg.ptr))
-            elif isinstance(arg, Sampler):
-                _handle_error(_lib.kernel__set_arg_sampler(self.ptr, arg_index,
-                                                           arg.ptr))
-            elif isinstance(arg, LocalMemory):
-                _handle_error(_lib.kernel__set_arg_buf(self.ptr, arg_index,
-                                                       _ffi.NULL, arg.size))
+            self._set_arg_clkernelarg(self, arg_index, arg)
         elif _CPY2 and isinstance(arg, np.generic):
             # https://github.com/numpy/numpy/issues/5381
             c_buf, size, _ = _c_buffer_from_obj(np.getbuffer(arg))
diff --git a/setup.py b/setup.py
index ba00bb944188617e8df40e6e93c7132e25dcb7cd..1d735904cc533f3f39405bd1a9d8285b3f2a59e3 100644
--- a/setup.py
+++ b/setup.py
@@ -72,11 +72,11 @@ def get_config_schema():
     return ConfigSchema([
         Switch("CL_TRACE", False, "Enable OpenCL API tracing"),
         Switch("CL_ENABLE_GL", False, "Enable OpenCL<->OpenGL interoperability"),
-        Switch("CL_USE_SHIPPED_EXT", True,
+        Switch(
+            "CL_USE_SHIPPED_EXT", True,
             "Use the pyopencl version of CL/cl_ext.h which includes" +
             " a broader range of vendor-specific OpenCL extension attributes" +
-            " than the standard Khronos (or vendor specific) CL/cl_ext.h."
-        ),
+            " than the standard Khronos (or vendor specific) CL/cl_ext.h."),
         Option("CL_PRETEND_VERSION", None,
             "Dotted CL version (e.g. 1.2) which you'd like to use."),
 
diff --git a/src/c_wrapper/kernel.cpp b/src/c_wrapper/kernel.cpp
index e787ff8e93a0b5adbc0d205fea1916ef3aa1cc51..817e10619727ae7d060c40d3096b637ec5923629 100644
--- a/src/c_wrapper/kernel.cpp
+++ b/src/c_wrapper/kernel.cpp
@@ -144,6 +144,19 @@ kernel__set_arg_buf(clobj_t _knl, cl_uint arg_index,
         });
 }
 
+error*
+kernel__set_arg_svm_pointer(clobj_t _knl, cl_uint arg_index, void *value)
+{
+#if PYOPENCL_CL_VERSION >= 0x2000
+    auto knl = static_cast<kernel*>(_knl);
+    return c_handle_error([&] {
+            pyopencl_call_guarded(clSetKernelArgSVMPointer, knl, arg_index, value);
+        });
+#else
+    PYOPENCL_UNSUPPORTED_BEFORE(clSetKernelArgSVMPointer, "CL 2.0")
+#endif
+}
+
 error*
 kernel__get_work_group_info(clobj_t _knl, cl_kernel_work_group_info param,
                             clobj_t _dev, generic_info *out)
diff --git a/src/c_wrapper/wrap_cl_core.h b/src/c_wrapper/wrap_cl_core.h
index 8ef6f2e8066b4b50e938ee67dc0df1191dc9e4e6..7a4992f478514778dd082c1071116ed32810f13a 100644
--- a/src/c_wrapper/wrap_cl_core.h
+++ b/src/c_wrapper/wrap_cl_core.h
@@ -211,6 +211,7 @@ error *kernel__set_arg_sampler(clobj_t kernel, cl_uint arg_index,
                                clobj_t sampler);
 error *kernel__set_arg_buf(clobj_t kernel, cl_uint arg_index,
                            const void *buffer, size_t size);
+error *kernel__set_arg_svm_pointer(clobj_t kernel, cl_uint arg_index, void *value);
 error *kernel__get_work_group_info(clobj_t kernel,
                                    cl_kernel_work_group_info param,
                                    clobj_t device, generic_info *out);
diff --git a/test/test_wrapper.py b/test/test_wrapper.py
index 903da75813f097d5db338430c75db0f988c0d66a..43624afeef9ac5d8ca51bcbbd2d6df00ac92ad0b 100644
--- a/test/test_wrapper.py
+++ b/test/test_wrapper.py
@@ -914,7 +914,7 @@ def test_spirv(ctx_factory):
     if (ctx._get_cl_version() < (2, 1) or
             cl.get_cl_header_version() < (2, 1)):
         from pytest import skip
-        skip("SPIR-V program creation only available in OpenCL 2.1")
+        skip("SPIR-V program creation only available in OpenCL 2.1 and higher")
 
     n = 50000
 
@@ -932,6 +932,32 @@ def test_spirv(ctx_factory):
     assert la.norm((dest_dev - (a_dev+b_dev)).get()) < 1e-7
 
 
+def test_coarse_grain_svm(ctx_factory):
+    ctx = ctx_factory()
+    # queue = cl.CommandQueue(ctx)
+
+    if (ctx._get_cl_version() < (2, 0) or
+            cl.get_cl_header_version() < (2, 0)):
+        from pytest import skip
+        skip("SVM only available in OpenCL 2.0 and higher")
+
+    svm_ary = cl.csvm_empty(ctx, (100, 100), np.float32, alignment=64)
+    assert isinstance(svm_ary.base, cl.SVMAllocation)
+
+
+def test_fine_grain_svm(ctx_factory):
+    ctx = ctx_factory()
+    # queue = cl.CommandQueue(ctx)
+
+    if (ctx._get_cl_version() < (2, 0) or
+            cl.get_cl_header_version() < (2, 0)):
+        from pytest import skip
+        skip("SVM only available in OpenCL 2.0 and higher")
+
+    svm_ary = cl.fsvm_empty(ctx, (100, 100), np.float32, alignment=64)
+    assert isinstance(svm_ary.base, cl.SVMAllocation)
+
+
 if __name__ == "__main__":
     # make sure that import failures get reported, instead of skipping the tests.
     import pyopencl  # noqa