From 3f8d844ba099017d93ae17cfb6dc15d0dd0040e5 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Thu, 4 Aug 2016 22:16:06 -0500 Subject: [PATCH] A first cut of SVM support --- doc/runtime_memory.rst | 43 +++-- pyopencl/__init__.py | 165 ++++++++++++++++++- pyopencl/cffi_cl.py | 308 ++++++++++++++++++++++++++++++++--- setup.py | 6 +- src/c_wrapper/kernel.cpp | 13 ++ src/c_wrapper/wrap_cl_core.h | 1 + test/test_wrapper.py | 28 +++- 7 files changed, 522 insertions(+), 42 deletions(-) diff --git a/doc/runtime_memory.rst b/doc/runtime_memory.rst index 3f85a6ca..3816abaa 100644 --- a/doc/runtime_memory.rst +++ b/doc/runtime_memory.rst @@ -33,7 +33,7 @@ OpenCL Runtime: Memory |comparable| Memory Migration -^^^^^^^^^^^^^^^^ +---------------- .. function:: enqueue_migrate_mem_objects(queue, mem_objects, flags=0, wait_for=None) @@ -52,8 +52,8 @@ Memory Migration Only available with the `cl_ext_migrate_memobject` extension. -Buffers -^^^^^^^ +Buffer +------ .. class:: Buffer(context, flags, size=0, hostbuf=None) @@ -107,24 +107,39 @@ Buffers .. _svm: -Shared Virtual Memory -^^^^^^^^^^^^^^^^^^^^^ +Shared Virtual Memory (SVM) +--------------------------- -Shared virtual memory requires OpenCL 2.0. +Shared virtual memory allows the host and the compute device to share +address space, so that pointers on the host and on the device may have +the same meaning. In addition, it allows the same memory to be accessed +by both the host and the device. *Coarse-grain* SVM requires that +buffers be mapped before being accessed on the host, *fine-grain* SVM +does away with that requirement. -.. autoclass:: SVMAllocation +SVM requires OpenCL 2.0. .. autoclass:: SVM .. autoclass:: SVMMap +Allocating SVM +^^^^^^^^^^^^^^ + +.. autofunction:: svm_empty +.. autofunction:: svm_empty_like .. autofunction:: csvm_empty -.. autofunction:: fsvm_empty .. autofunction:: csvm_empty_like +.. autofunction:: fsvm_empty .. autofunction:: fsvm_empty_like -Image Formats -^^^^^^^^^^^^^ +SVM Allocation Holder +^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: SVMAllocation + +Image +----- .. class:: ImageFormat([channel_order, channel_type]) @@ -169,8 +184,6 @@ Image Formats See :class:`mem_flags` for possible values of *flags* and :class:`mem_object_type` for possible values of *image_type*. -Images -^^^^^^ .. class:: Image(context, flags, format, shape=None, pitches=None, hostbuf=None, is_array=False, buffer=None): See :class:`mem_flags` for values of *flags*. @@ -255,12 +268,12 @@ Images .. versionadded:: 2011.2 Transfers -^^^^^^^^^ +--------- .. autofunction:: enqueue_copy(queue, dest, src, **kwargs) Mapping Memory into Host Address Space -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +-------------------------------------- .. autoclass:: MemoryMap @@ -309,7 +322,7 @@ Mapping Memory into Host Address Space Added *strides* argument. Samplers -^^^^^^^^ +-------- .. class:: Sampler(context, normalized_coords, addressing_mode, filter_mode) diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 9100c844..166601f2 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -112,6 +112,10 @@ from pyopencl.cffi_cl import ( # noqa MemoryObject, MemoryMap, Buffer, + SVMAllocation, + SVM, + SVMMap, + CompilerWarning, _Program, Kernel, @@ -695,7 +699,8 @@ def enqueue_copy(queue, dest, src, **kwargs): Two types of 'buffer' occur in the arguments to this function, :class:`Buffer` and 'host-side buffers'. The latter are defined by Python and commonly called `buffer objects - <https://docs.python.org/3.4/c-api/buffer.html>`_. + <https://docs.python.org/3.4/c-api/buffer.html>`_. :mod:`numpy` + arrays are a very common example. Make sure to always be clear on whether a :class:`Buffer` or a Python buffer object is needed. @@ -788,6 +793,15 @@ def enqueue_copy(queue, dest, src, **kwargs): :arg region: :class:`tuple` of :class:`int` of length three or shorter. (mandatory) + .. ------------------------------------------------------------------------ + .. rubric :: Transfer :class:`SVM`/host ↔ :class:`SVM`/host + .. ------------------------------------------------------------------------ + + :arg byte_count: (optional) If not specified, defaults to the + size of the source in versions 2012.x and earlier, + and to the minimum of the size of the source and target + from 2013.1 on. + |std-enqueue-blurb| .. versionadded:: 2011.1 @@ -983,4 +997,153 @@ def enqueue_fill_buffer(queue, mem, pattern, offset, size, wait_for=None): # }}} +# {{{ numpy-like svm allocation + +def svm_empty(ctx, flags, shape, dtype, order="C", alignment=None): + """Allocate an empty :class:`numpy.ndarray` of the given *shape*, *dtype* + and *order*. (See :func:`numpy.empty` for the meaning of these arguments.) + The array will be allocated in shared virtual memory belonging + to *ctx*. + + :arg ctx: a :class:`Context` + :arg flags: a combination of flags from :class:`svm_mem_flags`. + :arg alignment: the number of bytes to which the beginning of the memory + is aligned. Defaults to the :attr:`numpy.dtype.itemsize` of *dtype*. + + :returns: a :class:`numpy.ndarray` whose :attr:`numpy.ndarray.base` attribute + is a :class:`SVMAllocation`. + + To pass the resulting array to an OpenCL kernel or :func:`enqueue_copy`, you + will likely want to wrap the returned array in an :class:`SVM` tag. + + .. versionadded:: 2016.2 + """ + + dtype = np.dtype(dtype) + + try: + s = 1 + for dim in shape: + s *= dim + except TypeError: + import sys + if sys.version_info >= (3,): + admissible_types = (int, np.integer) + else: + admissible_types = (np.integer,) + six.integer_types + + if not isinstance(shape, admissible_types): + raise TypeError("shape must either be iterable or " + "castable to an integer") + s = shape + shape = (shape,) + + itemsize = dtype.itemsize + nbytes = s * itemsize + + from pyopencl.compyte.array import c_contiguous_strides, f_contiguous_strides + + if order in "fF": + strides = f_contiguous_strides(itemsize, shape) + elif order in "cC": + strides = c_contiguous_strides(itemsize, shape) + else: + raise ValueError("order not recognized: %s" % order) + + descr = dtype.descr + if len(descr) == 1: + typestr = descr[0][1] + else: + typestr = "V%d" % itemsize + + interface = { + "version": 3, + "shape": shape, + "typestr": typestr, + "descr": descr, + "strides": strides, + } + + if alignment is None: + alignment = itemsize + + svm_alloc = SVMAllocation(ctx, nbytes, alignment, flags, _interface=interface) + return SVM(np.asarray(svm_alloc)) + + +def svm_empty_like(ctx, flags, ary, alignment=None): + """Allocate an empty :class:`numpy.ndarray` like the existing + :class:`numpy.ndarray` *ary*. The array will be allocated in shared + virtual memory belonging to *ctx*. + + :arg ctx: a :class:`Context` + :arg flags: a combination of flags from :class:`svm_mem_flags`. + :arg alignment: the number of bytes to which the beginning of the memory + is aligned. Defaults to the :attr:`numpy.dtype.itemsize` of *dtype*. + + :returns: a :class:`numpy.ndarray` whose :attr:`numpy.ndarray.base` attribute + is a :class:`SVMAllocation`. + + To pass the resulting array to an OpenCL kernel or :func:`enqueue_copy`, you + will likely want to wrap the returned array in an :class:`SVM` tag. + + .. versionadded:: 2016.2 + """ + if ary.flags.c_contiguous: + order = "C" + elif ary.flags.f_contiguous: + order = "F" + else: + raise ValueError("array is neither C- nor Fortran-contiguous") + + return svm_empty(ctx, ary.shape, ary.dtype, order, + flags=flags, alignment=alignment) + + +def csvm_empty(ctx, shape, dtype, order="C", alignment=None): + """ + Like :func:`svm_empty`, but with *flags* set for a coarse-grain read-write + buffer. + + .. versionadded:: 2016.2 + """ + return svm_empty(ctx, svm_mem_flags.READ_WRITE, shape, dtype, order, alignment) + + +def csvm_empty_like(ctx, ary, alignment=None): + """ + Like :func:`svm_empty_like`, but with *flags* set for a coarse-grain + read-write buffer. + + .. versionadded:: 2016.2 + """ + return svm_empty_like(ctx, svm_mem_flags.READ_WRITE, ary) + + +def fsvm_empty(ctx, shape, dtype, order="C", alignment=None): + """ + Like :func:`svm_empty`, but with *flags* set for a fine-grain read-write + buffer. + + .. versionadded:: 2016.2 + """ + return svm_empty(ctx, + svm_mem_flags.READ_WRITE | svm_mem_flags.SVM_FINE_GRAIN_BUFFER, + shape, dtype, order, alignment) + + +def fsvm_empty_like(ctx, ary, alignment=None): + """ + Like :func:`svm_empty_like`, but with *flags* set for a fine-grain + read-write buffer. + + .. versionadded:: 2016.2 + """ + return svm_empty_like( + ctx, + svm_mem_flags.READ_WRITE | svm_mem_flags.SVM_FINE_GRAIN_BUFFER, + ary) + +# }}} + # vim: foldmethod=marker diff --git a/pyopencl/cffi_cl.py b/pyopencl/cffi_cl.py index b4c104f0..31df8050 100644 --- a/pyopencl/cffi_cl.py +++ b/pyopencl/cffi_cl.py @@ -804,7 +804,7 @@ class CommandQueue(_Common): # }}} -# {{{ memory objects +# {{{ _norm_shape_dtype and cffi_array def _norm_shape_dtype(shape, dtype, order="C", strides=None, name=""): dtype = np.dtype(dtype) @@ -839,17 +839,10 @@ class cffi_array(np.ndarray): # noqa def base(self): return self.__base +# }}} -class LocalMemory(_CLKernelArg): - __slots__ = ('_size',) - - def __init__(self, size): - self._size = size - - @property - def size(self): - return self._size +# {{{ MemoryObjectHolder base class class MemoryObjectHolder(_Common, _CLKernelArg): def get_host_array(self, shape, dtype, order="C"): @@ -867,6 +860,10 @@ class MemoryObjectHolder(_Common, _CLKernelArg): "MemoryObjectHolder.get_host_array") return ary +# }}} + + +# {{{ MemoryObject class MemoryObject(MemoryObjectHolder): def __init__(self, hostbuf=None): @@ -894,8 +891,18 @@ class MemoryObject(MemoryObjectHolder): def release(self): _handle_error(_lib.memory_object__release(self.ptr)) +# }}} + + +# {{{ MemoryMap class MemoryMap(_Common): + """" + .. automethod:: release + + This class may also be used as a context manager in a ``with`` statement. + """ + @classmethod def _create(cls, ptr, shape, typestr, strides): self = _Common._create.__func__(cls, ptr) @@ -908,6 +915,12 @@ class MemoryMap(_Common): } return self + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.release() + def release(self, queue=None, wait_for=None): c_wait_for, num_wait_for = _clobj_list(wait_for) _event = _ffi.new('clobj_t*') @@ -916,6 +929,8 @@ class MemoryMap(_Common): c_wait_for, num_wait_for, _event)) return Event._create(_event[0]) +# }}} + # {{{ _c_buffer_from_obj @@ -960,8 +975,6 @@ elif sys.version_info >= (2, 7, 4): _ssize_t = ctypes.c_size_t def _c_buffer_from_obj(obj, writable=False, retain=False): - # {{{ fall back to the old CPython buffer protocol API - # {{{ try the numpy array interface first # avoid slow ctypes-based buffer interface wrapper @@ -976,6 +989,8 @@ elif sys.version_info >= (2, 7, 4): # }}} + # {{{ fall back to the old CPython buffer protocol API + from pyopencl._buffers import Py_buffer, PyBUF_ANY_CONTIGUOUS, PyBUF_WRITABLE flags = PyBUF_ANY_CONTIGUOUS @@ -1071,6 +1086,233 @@ class Buffer(MemoryObject): # }}} + +# {{{ SVMAllocation + +class SVMAllocation(object): + """An object whose lifetime is tied to an allocation of shared virtual memory. + + .. note:: + + Most likely, you will not want to use this directly, but rather + :func:`svm_empty` and related functions which allow access to this + functionality using a friendlier, more Pythonic interface. + + .. versionadded:: 2016.2 + + .. automethod:: __init__(self, ctx, size, alignment, flags=None) + .. automethod:: release + .. automethod:: enqueue_release + """ + def __init__(self, ctx, size, alignment, flags, _interface=None): + """ + :arg ctx: a :class:`Context` + :arg flags: some of :class:`svm_mem_flags`. + """ + + self.ptr = None + + ptr = _ffi.new('void**') + _handle_error(_lib.svm_alloc( + ctx.ptr, flags, size, alignment, + ptr)) + + self.ctx = ctx + self.ptr = ptr[0] + self.is_fine_grain = flags & svm_mem_flags.SVM_FINE_GRAIN_BUFFER + + if _interface is not None: + _interface["data"] = ( + int(_ffi.cast("intptr_t", self.ptr)), + flags & mem_flags.WRITE_ONLY != 0 + or flags & mem_flags.READ_WRITE != 0) + self.__array_interface__ = _interface + + def __del__(self): + if self.ptr is not None: + self.release() + + def release(self): + _handle_error(_lib.svm_free(self.ctx.ptr, self.ptr)) + self.ptr = None + + def enqueue_release(self, queue, wait_for=None): + """ + :arg flags: a combination of :class:`pyopencl.map_flags` + :returns: a :class:`pyopencl.Event` + + |std-enqueue-blurb| + """ + ptr_event = _ffi.new('clobj_t*') + c_wait_for, num_wait_for = _clobj_list(wait_for) + _handle_error(_lib.enqueue_svm_free( + ptr_event, queue.ptr, 1, self.ptr, + c_wait_for, num_wait_for)) + + self.ctx = None + self.ptr = None + + return Event._create(ptr_event[0]) + +# }}} + + +# {{{ SVM + +#TODO: +# doc example +# finish copy +# test +# fill +# test +# migrate + +class SVM(_CLKernelArg): + """Tags an object exhibiting the Python buffer interface (such as a + :class:`numpy.ndarray`) as referring to shared virtual memory. + + Depending on the features of the OpenCL implementation, the following + types of objects may be passed to/wrapped in this type: + + * coarse-grain shared memory as returned by (e.g.) :func:`csvm_empty` + for any implementation of OpenCL 2.0. + + * fine-grain shared memory as returned by (e.g.) :func:`fsvm_empty`, + if the implementation supports fine-grained shared virtual memory. + + * any :class:`numpy.ndarray` (or other Python object with a buffer + interface) if the implementation supports fine-grained *system* shared + virtual memory. + + Objects of this type may be passed to kernel calls and :func:`enqueue_copy`. + Coarse-grain shared-memory *must* be mapped into host address space using + :meth:`map` before being accessed through the :mod:`numpy` interface. + + .. note:: + + This object merely serves as a 'tag' that changes the meaning + of functions to which it is passed. It has no special management + relationship to the memory it tags. For example, it is permissible + to grab a :mod:`numpy.array` out of :attr:`SVM.memory` of one + :class:`SVM` instance and use the array to construct another. + Neither of the tags needs to be kept alive. + + .. versionadded:: 2016.2 + + .. attribute:: mem + + The wrapped object. + + .. automethod:: __init__ + .. automethod:: map + .. automethod:: as_buffer + """ + + def __init__(self, mem): + self.mem = mem + + def map(self, queue, is_blocking=True, flags=None, wait_for=None): + """ + :arg is_blocking: If *False*, subsequent code must wait on + :attr:`SVMMap.event` in the returned object before accessing the + mapped memory. + :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to + read-write. + :returns: an :class:`SVMMap` instance + + |std-enqueue-blurb| + """ + if flags is None: + flags = map_flags.READ | map_flags.WRITE + + c_buf, size, _ = _c_buffer_from_obj(self.mem, writable=bool( + flags & (map_flags.WRITE | map_flags.INVALIDATE_REGION))) + + ptr_event = _ffi.new('clobj_t*') + c_wait_for, num_wait_for = _clobj_list(wait_for) + _handle_error(_lib.enqueue_svm_map( + ptr_event, queue.ptr, is_blocking, flags, + c_buf, size, + c_wait_for, num_wait_for)) + + evt = Event._create(ptr_event[0]), SVMMap(self.mem) + return SVMMap(self, queue, evt) + + def _enqueue_unmap(self, queue, wait_for=None): + c_buf, _, _ = _c_buffer_from_obj(self.mem) + + ptr_event = _ffi.new('clobj_t*') + c_wait_for, num_wait_for = _clobj_list(wait_for) + _handle_error(_lib.enqueue_svm_unmap( + ptr_event, queue.ptr, + c_buf, + c_wait_for, num_wait_for)) + + return Event._create(ptr_event[0]), SVMMap(self.mem) + + def as_buffer(self, ctx, flags=None): + """ + :arg ctx: a :class:`Context` + :arg flags: a combination of :class:`pyopencl.map_flags`, defaults to + read-write. + :returns: a :class:`Buffer` corresponding to *self*. + + The memory referred to by this object must not be freed before + the returned :class:`Buffer` is released. + """ + + if flags is None: + flags = mem_flags.READ_WRITE + + return Buffer(ctx, flags, size=self.mem.nbytes, hostbuf=self.mem) + +# }}} + + +# {{{ SVMMap + +class SVMMap(_CLKernelArg): + """ + .. attribute:: event + + .. versionadded:: 2016.2 + + .. automethod:: release + + This class may also be used as a context manager in a ``with`` statement. + :meth:`release` will be called upon exit from the ``with`` region. + The value returned to the ``as`` part of the context manager is the + mapped Python object (e.g. a :mod:`numpy` array). + """ + def __init__(self, svm, queue, event): + self.svm = svm + self.queue = queue + self.event = event + + def __del__(self): + if self.svm is not None: + self.release() + + def __enter__(self): + return self.svm.mem + + def __exit__(self, exc_type, exc_val, exc_tb): + self.release() + + def release(self, queue=None, wait_for=None): + """ + :arg queue: a :class:`pyopencl.CommandQueue`. Defaults to the one + with which the map was created, if not specified. + :returns: a :class:`pyopencl.Event` + + |std-enqueue-blurb| + """ + + evt = self.svm._enqueue_unmap(self.queue) + self.svm = None + + return evt + # }}} @@ -1246,6 +1488,17 @@ class _Program(_Common): # }}} +class LocalMemory(_CLKernelArg): + __slots__ = ('_size',) + + def __init__(self, size): + self._size = size + + @property + def size(self): + return self._size + + # {{{ Kernel # {{{ arg packing helpers @@ -1407,8 +1660,8 @@ class Kernel(_Common): status = _lib.kernel__set_arg_null(self.ptr, {arg_idx}) if status != _ffi.NULL: _handle_error(status) - elif isinstance({arg_var}, _CLKernelArg): - self.set_arg({arg_idx}, {arg_var}) + else: + self._set_arg_clkernelarg({arg_idx}, {arg_var}) """ .format(arg_idx=arg_idx, arg_var=arg_var)) @@ -1588,19 +1841,30 @@ class Kernel(_Common): capture_kernel_call(self, filename, queue, global_size, local_size, *args, **kwargs) + def _set_arg_clkernelarg(self, arg_index, arg): + if isinstance(arg, MemoryObjectHolder): + _handle_error(_lib.kernel__set_arg_mem(self.ptr, arg_index, arg.ptr)) + elif isinstance(arg, SVM): + c_buf, _, _ = _c_buffer_from_obj(arg.mem) + _handle_error(_lib.kernel__set_arg_svm_pointer( + self.ptr, arg_index, c_buf)) + elif isinstance(arg, Sampler): + _handle_error(_lib.kernel__set_arg_sampler(self.ptr, arg_index, + arg.ptr)) + elif isinstance(arg, LocalMemory): + _handle_error(_lib.kernel__set_arg_buf(self.ptr, arg_index, + _ffi.NULL, arg.size)) + else: + raise RuntimeError("unexpected _CLKernelArg subclass" + "dimensions", status_code.INVALID_VALUE, + "clSetKernelArg") + def set_arg(self, arg_index, arg): # If you change this, also change the kernel call generation logic. if arg is None: _handle_error(_lib.kernel__set_arg_null(self.ptr, arg_index)) elif isinstance(arg, _CLKernelArg): - if isinstance(arg, MemoryObjectHolder): - _handle_error(_lib.kernel__set_arg_mem(self.ptr, arg_index, arg.ptr)) - elif isinstance(arg, Sampler): - _handle_error(_lib.kernel__set_arg_sampler(self.ptr, arg_index, - arg.ptr)) - elif isinstance(arg, LocalMemory): - _handle_error(_lib.kernel__set_arg_buf(self.ptr, arg_index, - _ffi.NULL, arg.size)) + self._set_arg_clkernelarg(self, arg_index, arg) elif _CPY2 and isinstance(arg, np.generic): # https://github.com/numpy/numpy/issues/5381 c_buf, size, _ = _c_buffer_from_obj(np.getbuffer(arg)) diff --git a/setup.py b/setup.py index ba00bb94..1d735904 100644 --- a/setup.py +++ b/setup.py @@ -72,11 +72,11 @@ def get_config_schema(): return ConfigSchema([ Switch("CL_TRACE", False, "Enable OpenCL API tracing"), Switch("CL_ENABLE_GL", False, "Enable OpenCL<->OpenGL interoperability"), - Switch("CL_USE_SHIPPED_EXT", True, + Switch( + "CL_USE_SHIPPED_EXT", True, "Use the pyopencl version of CL/cl_ext.h which includes" + " a broader range of vendor-specific OpenCL extension attributes" + - " than the standard Khronos (or vendor specific) CL/cl_ext.h." - ), + " than the standard Khronos (or vendor specific) CL/cl_ext.h."), Option("CL_PRETEND_VERSION", None, "Dotted CL version (e.g. 1.2) which you'd like to use."), diff --git a/src/c_wrapper/kernel.cpp b/src/c_wrapper/kernel.cpp index e787ff8e..817e1061 100644 --- a/src/c_wrapper/kernel.cpp +++ b/src/c_wrapper/kernel.cpp @@ -144,6 +144,19 @@ kernel__set_arg_buf(clobj_t _knl, cl_uint arg_index, }); } +error* +kernel__set_arg_svm_pointer(clobj_t _knl, cl_uint arg_index, void *value) +{ +#if PYOPENCL_CL_VERSION >= 0x2000 + auto knl = static_cast<kernel*>(_knl); + return c_handle_error([&] { + pyopencl_call_guarded(clSetKernelArgSVMPointer, knl, arg_index, value); + }); +#else + PYOPENCL_UNSUPPORTED_BEFORE(clSetKernelArgSVMPointer, "CL 2.0") +#endif +} + error* kernel__get_work_group_info(clobj_t _knl, cl_kernel_work_group_info param, clobj_t _dev, generic_info *out) diff --git a/src/c_wrapper/wrap_cl_core.h b/src/c_wrapper/wrap_cl_core.h index 8ef6f2e8..7a4992f4 100644 --- a/src/c_wrapper/wrap_cl_core.h +++ b/src/c_wrapper/wrap_cl_core.h @@ -211,6 +211,7 @@ error *kernel__set_arg_sampler(clobj_t kernel, cl_uint arg_index, clobj_t sampler); error *kernel__set_arg_buf(clobj_t kernel, cl_uint arg_index, const void *buffer, size_t size); +error *kernel__set_arg_svm_pointer(clobj_t kernel, cl_uint arg_index, void *value); error *kernel__get_work_group_info(clobj_t kernel, cl_kernel_work_group_info param, clobj_t device, generic_info *out); diff --git a/test/test_wrapper.py b/test/test_wrapper.py index 903da758..43624afe 100644 --- a/test/test_wrapper.py +++ b/test/test_wrapper.py @@ -914,7 +914,7 @@ def test_spirv(ctx_factory): if (ctx._get_cl_version() < (2, 1) or cl.get_cl_header_version() < (2, 1)): from pytest import skip - skip("SPIR-V program creation only available in OpenCL 2.1") + skip("SPIR-V program creation only available in OpenCL 2.1 and higher") n = 50000 @@ -932,6 +932,32 @@ def test_spirv(ctx_factory): assert la.norm((dest_dev - (a_dev+b_dev)).get()) < 1e-7 +def test_coarse_grain_svm(ctx_factory): + ctx = ctx_factory() + # queue = cl.CommandQueue(ctx) + + if (ctx._get_cl_version() < (2, 0) or + cl.get_cl_header_version() < (2, 0)): + from pytest import skip + skip("SVM only available in OpenCL 2.0 and higher") + + svm_ary = cl.csvm_empty(ctx, (100, 100), np.float32, alignment=64) + assert isinstance(svm_ary.base, cl.SVMAllocation) + + +def test_fine_grain_svm(ctx_factory): + ctx = ctx_factory() + # queue = cl.CommandQueue(ctx) + + if (ctx._get_cl_version() < (2, 0) or + cl.get_cl_header_version() < (2, 0)): + from pytest import skip + skip("SVM only available in OpenCL 2.0 and higher") + + svm_ary = cl.fsvm_empty(ctx, (100, 100), np.float32, alignment=64) + assert isinstance(svm_ary.base, cl.SVMAllocation) + + if __name__ == "__main__": # make sure that import failures get reported, instead of skipping the tests. import pyopencl # noqa -- GitLab