Skip to content
Snippets Groups Projects 44.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • Andreas Klöckner's avatar
    Andreas Klöckner committed
        a_dev = clrand(queue, (5, 15, 20), dtype=np.float32)
        b_dev = clrand(queue, (4, 15, 20), dtype=np.float32)
        c_dev = clrand(queue, (3, 15, 20), dtype=np.float32)
        a = a_dev.get()
        b = b_dev.get()
        c = c_dev.get()
        cat_dev = cl.array.concatenate((a_dev, b_dev, c_dev))
        cat = np.concatenate((a, b, c))
        assert la.norm(cat - cat_dev.get()) == 0
    # }}}
    # {{{ conditionals, any, all
    def test_comparisons(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        from pyopencl.clrandom import rand as clrand
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        ary_len = 20000
        a_dev = clrand(queue, (ary_len,), dtype=np.float32)
        b_dev = clrand(queue, (ary_len,), dtype=np.float32)
        a = a_dev.get()
        b = b_dev.get()
        import operator as o
        for op in [o.eq,, o.le,,,]:
            res_dev = op(a_dev, b_dev)
            res = op(a, b)
            assert (res_dev.get() == res).all()
            res_dev = op(a_dev, 0)
            res = op(a, 0)
            assert (res_dev.get() == res).all()
            res_dev = op(0, b_dev)
            res = op(0, b)
            assert (res_dev.get() == res).all()
            res2_dev = op(0, res_dev)
            res2 = op(0, res)
            assert (res2_dev.get() == res2).all()
    def test_any_all(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        ary_len = 20000
        a_dev = cl_array.zeros(queue, (ary_len,), dtype=np.int8)
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        assert not a_dev.all().get()
        assert not a_dev.any().get()
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        assert not a_dev.all().get()
        assert a_dev.any().get()
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        assert a_dev.all().get()
        assert a_dev.any().get()
    def test_map_to_host(ctx_factory):
        if _PYPY:
            pytest.skip("numpypy: no array creation from __array_interface__")
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        if context.devices[0].type & cl.device_type.GPU:
            mf = cl.mem_flags
            allocator = cl_tools.DeferredAllocator(
                    context, mf.READ_WRITE | mf.ALLOC_HOST_PTR)
            allocator = None
        a_dev = cl_array.zeros(queue, (5, 6, 7,), dtype=np.float32, allocator=allocator)
        a_host = a_dev.map_to_host()
        a_host[1, 2, 3] = 10
        a_host_saved = a_host.copy()
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        print("DEV[HOST_WRITE]", a_dev.get()[1, 2, 3])
        print("HOST[DEV_WRITE]", a_host_saved[3, 2, 1])
        assert (a_host_saved == a_dev.get()).all()
    def test_view_and_strides(ctx_factory):
        if _PYPY:
            pytest.xfail("numpypy: no array creation from __array_interface__")
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        from pyopencl.clrandom import rand as clrand
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        x = clrand(queue, (5, 10), dtype=np.float32)
        y = x[:3, :5]
        yv = y.view()
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        assert yv.shape == y.shape
        assert yv.strides == y.strides
        with pytest.raises(AssertionError):
    Andreas Klöckner's avatar
    Andreas Klöckner committed
            assert (yv.get() == x.get()[:3, :5]).all()
    def test_meshmode_view(ctx_factory):
        if _PYPY:
            pytest.xfail("numpypy bug #28")
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        n = 2
        result = cl.array.empty(queue, (2, n*6), np.float32)
        def view(z):
            return z[..., n*3:n*6].reshape(z.shape[:-1] + (n, 3))
        result = result.with_queue(queue)
        x = result.get()
        assert (view(x) == 1).all()
    def test_event_management(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        from pyopencl.clrandom import rand as clrand
        x = clrand(queue, (5, 10), dtype=np.float32)
        assert len( == 1, len(
        assert len( == 0
        y = x+x
        assert len( == 1
        y = x*x
        assert len( == 1
        y = 2*x
        assert len( == 1
        y = 2/x
        assert len( == 1
        y = x/2
        assert len( == 1
        y = x**2
        assert len( == 1
        y = 2**x
        assert len( == 1
        for i in range(10):
        assert len( == 10
        for i in range(1000):
        assert len( < 100
    def test_reshape(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        a = np.arange(128).reshape(8, 16).astype(np.float32)
        a_dev = cl_array.to_device(queue, a)
        # different ways to specify the shape
        a_dev.reshape(4, 32)
        a_dev.reshape((4, 32))
        a_dev.reshape([4, 32])
        # using -1 as unknown dimension
        assert a_dev.reshape(-1, 32).shape == (4, 32)
        assert a_dev.reshape((32, -1)).shape == (32, 4)
        assert a_dev.reshape((8, -1, 4)).shape == (8, 4, 4)
    def test_skip_slicing(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        a_host = np.arange(16).reshape((4, 4))
        b_host = a_host[::3]
        a = cl_array.to_device(queue, a_host)
        b = a[::3]
        assert b.shape == b_host.shape
    Matt Wala's avatar
    Matt Wala committed
        assert np.array_equal(b[1].get(), b_host[1])  # noqa pylint:disable=unsubscriptable-object
    def test_transpose(ctx_factory):
        if _PYPY:
            pytest.xfail("numpypy: no array creation from __array_interface__")
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        from pyopencl.clrandom import rand as clrand
        a_gpu = clrand(queue, (10, 20, 30), dtype=np.float32)
        a = a_gpu.get()
        # FIXME: not contiguous
        #assert np.allclose(a_gpu.transpose((1,2,0)).get(), a.transpose((1,2,0)))
        assert np.array_equal(a_gpu.T.get(), a.T)
    def test_newaxis(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        from pyopencl.clrandom import rand as clrand
        a_gpu = clrand(queue, (10, 20, 30), dtype=np.float32)
        a = a_gpu.get()
        b_gpu = a_gpu[:, np.newaxis]
        b = a[:, np.newaxis]
        assert b_gpu.shape == b.shape
        for i in range(b.ndim):
            if b.shape[i] > 1:
                assert b_gpu.strides[i] == b.strides[i]
    def test_squeeze(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        shape = (40, 2, 5, 100)
        a_cpu = np.random.random(size=shape)
        a_gpu = cl_array.to_device(queue, a_cpu)
        # Slice with length 1 on dimensions 0 and 1
        a_gpu_slice = a_gpu[0:1, 1:2, :, :]
        assert a_gpu_slice.shape == (1, 1, shape[2], shape[3])
        assert a_gpu_slice.flags.c_contiguous
        # Squeeze it and obtain contiguity
        a_gpu_squeezed_slice = a_gpu[0:1, 1:2, :, :].squeeze()
        assert a_gpu_squeezed_slice.shape == (shape[2], shape[3])
        assert a_gpu_squeezed_slice.flags.c_contiguous
        # Check that we get the original values out
        #assert np.all(a_gpu_slice.get().ravel() == a_gpu_squeezed_slice.get().ravel())
        # Slice with length 1 on dimensions 2
        a_gpu_slice = a_gpu[:, :, 2:3, :]
        assert a_gpu_slice.shape == (shape[0], shape[1], 1, shape[3])
        assert not a_gpu_slice.flags.c_contiguous
        # Squeeze it, but no contiguity here
        a_gpu_squeezed_slice = a_gpu[:, :, 2:3, :].squeeze()
        assert a_gpu_squeezed_slice.shape == (shape[0], shape[1], shape[3])
        assert not a_gpu_squeezed_slice.flags.c_contiguous
        # Check that we get the original values out
        #assert np.all(a_gpu_slice.get().ravel() == a_gpu_squeezed_slice.get().ravel())
    def test_fancy_fill(ctx_factory):
        if _PYPY:
            pytest.xfail("numpypy: multi value setting is not supported")
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        numpy_dest = np.zeros((4,), np.int32)
        numpy_idx = np.arange(3, dtype=np.int32)
        numpy_src = np.arange(8, 9, dtype=np.int32)
        numpy_dest[numpy_idx] = numpy_src
        cl_dest = cl_array.zeros(queue, (4,), np.int32)
        cl_idx = cl_array.arange(queue, 3, dtype=np.int32)
        cl_src = cl_array.arange(queue, 8, 9, dtype=np.int32)
        cl_dest[cl_idx] = cl_src
        assert np.all(numpy_dest == cl_dest.get())
    def test_fancy_indexing(ctx_factory):
        if _PYPY:
            pytest.xfail("numpypy: multi value setting is not supported")
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        n = 2 ** 20 + 2**18 + 22
        numpy_dest = np.zeros(n, dtype=np.int32)
        numpy_idx = np.arange(n, dtype=np.int32)
        numpy_src = 20000+np.arange(n, dtype=np.int32)
        cl_dest = cl_array.to_device(queue, numpy_dest)
        cl_idx = cl_array.to_device(queue, numpy_idx)
        cl_src = cl_array.to_device(queue, numpy_src)
        numpy_dest[numpy_idx] = numpy_src
        cl_dest[cl_idx] = cl_src
        assert np.array_equal(numpy_dest, cl_dest.get())
        numpy_dest = numpy_src[numpy_idx]
        cl_dest = cl_src[cl_idx]
        assert np.array_equal(numpy_dest, cl_dest.get())
        if _PYPY:
            pytest.xfail("numpypy: multi value setting is not supported")
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        cl_arrays = [
            cl_array.arange(queue, 0, 3, dtype=np.float32)
            for i in range(1, 10)
        idx = cl_array.arange(queue, 0, 6, dtype=np.int32)
        out_arrays = [
            cl_array.zeros(queue, (10,), np.float32)
            for i in range(9)
        out_compare = [np.zeros((10,), np.float32) for i in range(9)]
        for i, ary in enumerate(out_compare):
            ary[idx.get()] = np.arange(0, 6, dtype=np.float32)
        cl_array.multi_put(cl_arrays, idx, out=out_arrays)
        assert np.all(np.all(out_compare[i] == out_arrays[i].get()) for i in range(9))
    zachjweiner's avatar
    zachjweiner committed
    def test_get_async(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        device = queue.device
        if device.platform.vendor == "The pocl project" \
                and device.type & cl.device_type.GPU:
            pytest.xfail("the async get test fails on POCL + Nvidia,"
                    "at least the K40, as of pocl 1.6, 2021-01-20")
        a = np.random.rand(10**6).astype(np.dtype("float32"))
    zachjweiner's avatar
    zachjweiner committed
        a_gpu = cl_array.to_device(queue, a)
        b = a + a**5 + 1
        b_gpu = a_gpu + a_gpu**5 + 1
        # deprecated, but still test
        b1 = b_gpu.get(async_=True)  # testing that this waits for events
        assert np.abs(b1 - b).mean() < 1e-5
    zachjweiner's avatar
    zachjweiner committed
        b1, evt = b_gpu.get_async()  # testing that this waits for events
    zachjweiner's avatar
    zachjweiner committed
        assert np.abs(b1 - b).mean() < 1e-5
        wait_event = cl.UserEvent(context)
    zachjweiner's avatar
    zachjweiner committed
        b, evt = b_gpu.get_async()  # testing that this doesn't hang
    zachjweiner's avatar
    zachjweiner committed
    zachjweiner's avatar
    zachjweiner committed
    zachjweiner's avatar
    zachjweiner committed
        assert np.abs(b1 - b).mean() < 1e-5
    def test_outoforderqueue_get(ctx_factory):
        context = ctx_factory()
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
            queue = cl.CommandQueue(context,
        except Exception:
            pytest.skip("out-of-order queue not available")
        a = np.random.rand(10**6).astype(np.dtype("float32"))
        a_gpu = cl_array.to_device(queue, a)
        b_gpu = a_gpu + a_gpu**5 + 1
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
        b1 = b_gpu.get()  # testing that this waits for events
        b = a + a**5 + 1
        assert np.abs(b1 - b).mean() < 1e-5
    def test_outoforderqueue_copy(ctx_factory):
        context = ctx_factory()
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
            queue = cl.CommandQueue(context,
        except Exception:
            pytest.skip("out-of-order queue not available")
        a = np.random.rand(10**6).astype(np.dtype("float32"))
        a_gpu = cl_array.to_device(queue, a)
        c_gpu = a_gpu**2 - 7
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
        b_gpu = c_gpu.copy()  # testing that this waits for and creates events
        b_gpu *= 10
        b1 = b_gpu.get()
        b = 10 * (a**2 - 7)
        assert np.abs(b1 - b).mean() < 1e-5
    def test_outoforderqueue_indexing(ctx_factory):
        context = ctx_factory()
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
            queue = cl.CommandQueue(context,
        except Exception:
            pytest.skip("out-of-order queue not available")
        a = np.random.rand(10**6).astype(np.dtype("float32"))
        i = (8e5 + 1e5 * np.random.rand(10**5)).astype(np.dtype("int32"))
        a_gpu = cl_array.to_device(queue, a)
        i_gpu = cl_array.to_device(queue, i)
        c_gpu = (a_gpu**2)[i_gpu - 10000]
        b_gpu = 10 - a_gpu
        b_gpu[:] = 8 * a_gpu
        b_gpu[i_gpu + 10000] = c_gpu - 10
        b1 = b_gpu.get()
        c = (a**2)[i - 10000]
        b = 8 * a
        b[i + 10000] = c - 10
        assert np.abs(b1 - b).mean() < 1e-5
    def test_outoforderqueue_reductions(ctx_factory):
        context = ctx_factory()
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
            queue = cl.CommandQueue(context,
        except Exception:
            pytest.skip("out-of-order queue not available")
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
        # 0/1 values to avoid accumulated rounding error
        a = (np.random.rand(10**6) > 0.5).astype(np.dtype("float32"))
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
        a[800000] = 10  # all<5 looks true until near the end
        a_gpu = cl_array.to_device(queue, a)
        b1 = cl_array.sum(a_gpu).get()
        b2 =, 3 - a_gpu).get()
        b3 = (a_gpu < 5).all().get()
        assert b1 == a.sum() and b2 == - a) and b3 == 0
    def test_negative_dim_rejection(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        with pytest.raises(ValueError):
            cl_array.Array(queue, shape=-10, dtype=np.float64)
        with pytest.raises(ValueError):
            cl_array.Array(queue, shape=(-10,), dtype=np.float64)
    Matt Wala's avatar
    Matt Wala committed
        for left_dim in (-1, 0, 1):
            with pytest.raises(ValueError):
                cl_array.Array(queue, shape=(left_dim, -1), dtype=np.float64)
    Matt Wala's avatar
    Matt Wala committed
        for right_dim in (-1, 0, 1):
            with pytest.raises(ValueError):
                cl_array.Array(queue, shape=(-1, right_dim), dtype=np.float64)
    @pytest.mark.parametrize("empty_shape", [0, (), (3, 0, 2), (0, 5), (5, 0)])
    def test_zero_size_array(ctx_factory, empty_shape):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        if == "Intel(R) OpenCL":
            pytest.xfail("size-0 arrays fail on Intel CL")
        a = cl_array.zeros(queue, empty_shape, dtype=np.float32)
        b = cl_array.zeros(queue, empty_shape, dtype=np.float32)
        c = a + b
        c_host = c.get()
        cl_array.to_device(queue, c_host)
        assert c.flags.c_contiguous == c_host.flags.c_contiguous
        assert c.flags.f_contiguous == c_host.flags.f_contiguous
        for order in "CF":
            c_flat = c.reshape(-1, order=order)
            c_host_flat = c_host.reshape(-1, order=order)
            assert c_flat.shape == c_host_flat.shape
            assert c_flat.strides == c_host_flat.strides
            assert c_flat.flags.c_contiguous == c_host_flat.flags.c_contiguous
            assert c_flat.flags.f_contiguous == c_host_flat.flags.f_contiguous
    def test_str_without_queue(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
        a = cl_array.zeros(queue, 10, dtype=np.float32).with_queue(None)
    @pytest.mark.parametrize("order", ("F", "C"))
    Kaushik Kulkarni's avatar
    Kaushik Kulkarni committed
    @pytest.mark.parametrize("input_dims", (1, 2, 3))
    def test_stack(ctx_factory, input_dims, order):
    Kaushik Kulkarni's avatar
    Kaushik Kulkarni committed
        # Replicates pytato/test/
        import pyopencl.array as cla
        cl_ctx = ctx_factory()
        queue = cl.CommandQueue(cl_ctx)
        shape = (2, 2, 2)[:input_dims]
        axis = -1 if order == "F" else 0
    Kaushik Kulkarni's avatar
    Kaushik Kulkarni committed
        from numpy.random import default_rng
        rng = default_rng()
        x_in = rng.random(size=shape)
        y_in = rng.random(size=shape)
        x_in = x_in if order == "C" else np.asfortranarray(x_in)
        y_in = y_in if order == "C" else np.asfortranarray(y_in)
    Kaushik Kulkarni's avatar
    Kaushik Kulkarni committed
        x = cla.to_device(queue, x_in)
        y = cla.to_device(queue, y_in)
        np.testing.assert_allclose(cla.stack((x, y), axis=axis).get(),
                                    np.stack((x_in, y_in), axis=axis))
    def test_assign_different_strides(ctx_factory):
        cl_ctx = ctx_factory()
        queue = cl.CommandQueue(cl_ctx)
        from pyopencl.clrandom import rand as clrand
        a = clrand(queue, (20, 30), dtype=np.float32)
        b = cl_array.empty(queue, (20, 30), dtype=np.float32, order="F")
        with pytest.raises(NotImplementedError):
            b[:] = a
    def test_branch_operations_on_pure_scalars(ctx_factory):
        x = np.random.rand()
        y = np.random.rand()
        cond = np.random.choice([False, True])
        np.testing.assert_allclose(np.maximum(x, y),
                                   cl_array.maximum(x, y))
        np.testing.assert_allclose(np.minimum(x, y),
                                   cl_array.minimum(x, y))
        np.testing.assert_allclose(np.where(cond, x, y),
                                   cl_array.if_positive(cond, x, y))
    if __name__ == "__main__":
        if len(sys.argv) > 1:
            from pytest import main
    Andreas Klöckner's avatar
    Andreas Klöckner committed
    # vim: filetype=pyopencl:fdm=marker