Skip to content
Snippets Groups Projects
test_array.py 44.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • Andreas Klöckner's avatar
    Andreas Klöckner committed
    
        a_dev = clrand(queue, (5, 15, 20), dtype=np.float32)
        b_dev = clrand(queue, (4, 15, 20), dtype=np.float32)
        c_dev = clrand(queue, (3, 15, 20), dtype=np.float32)
        a = a_dev.get()
        b = b_dev.get()
        c = c_dev.get()
    
        cat_dev = cl.array.concatenate((a_dev, b_dev, c_dev))
        cat = np.concatenate((a, b, c))
    
        assert la.norm(cat - cat_dev.get()) == 0
    
    
    # }}}
    
    
    # {{{ conditionals, any, all
    
    def test_comparisons(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        from pyopencl.clrandom import rand as clrand
    
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        ary_len = 20000
        a_dev = clrand(queue, (ary_len,), dtype=np.float32)
        b_dev = clrand(queue, (ary_len,), dtype=np.float32)
    
    
        a = a_dev.get()
        b = b_dev.get()
    
        import operator as o
        for op in [o.eq, o.ne, o.le, o.lt, o.ge, o.gt]:
            res_dev = op(a_dev, b_dev)
            res = op(a, b)
    
            assert (res_dev.get() == res).all()
    
            res_dev = op(a_dev, 0)
            res = op(a, 0)
    
            assert (res_dev.get() == res).all()
    
            res_dev = op(0, b_dev)
            res = op(0, b)
    
            assert (res_dev.get() == res).all()
    
    
            res2_dev = op(0, res_dev)
            res2 = op(0, res)
            assert (res2_dev.get() == res2).all()
    
    
    
    def test_any_all(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        ary_len = 20000
        a_dev = cl_array.zeros(queue, (ary_len,), dtype=np.int8)
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        assert not a_dev.all().get()
        assert not a_dev.any().get()
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        assert not a_dev.all().get()
        assert a_dev.any().get()
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        assert a_dev.all().get()
        assert a_dev.any().get()
    
    def test_map_to_host(ctx_factory):
    
        if _PYPY:
            pytest.skip("numpypy: no array creation from __array_interface__")
    
    
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
    
        if context.devices[0].type & cl.device_type.GPU:
            mf = cl.mem_flags
            allocator = cl_tools.DeferredAllocator(
                    context, mf.READ_WRITE | mf.ALLOC_HOST_PTR)
        else:
            allocator = None
    
        a_dev = cl_array.zeros(queue, (5, 6, 7,), dtype=np.float32, allocator=allocator)
    
        a_host = a_dev.map_to_host()
        a_host[1, 2, 3] = 10
    
    
        a_host_saved = a_host.copy()
    
        a_host.base.release(queue)
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        print("DEV[HOST_WRITE]", a_dev.get()[1, 2, 3])
        print("HOST[DEV_WRITE]", a_host_saved[3, 2, 1])
    
    
        assert (a_host_saved == a_dev.get()).all()
    
    
    
    def test_view_and_strides(ctx_factory):
    
        if _PYPY:
            pytest.xfail("numpypy: no array creation from __array_interface__")
        return
    
    
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        from pyopencl.clrandom import rand as clrand
    
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        x = clrand(queue, (5, 10), dtype=np.float32)
        y = x[:3, :5]
        yv = y.view()
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
        assert yv.shape == y.shape
        assert yv.strides == y.strides
    
    
        with pytest.raises(AssertionError):
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
            assert (yv.get() == x.get()[:3, :5]).all()
    
    def test_meshmode_view(ctx_factory):
    
        if _PYPY:
            # https://bitbucket.org/pypy/numpy/issue/28/indexerror-on-ellipsis-slice
            pytest.xfail("numpypy bug #28")
    
    
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        n = 2
    
        result = cl.array.empty(queue, (2, n*6), np.float32)
    
    
        def view(z):
            return z[..., n*3:n*6].reshape(z.shape[:-1] + (n, 3))
    
        result = result.with_queue(queue)
        result.fill(0)
        view(result)[0].fill(1)
        view(result)[1].fill(1)
        x = result.get()
        assert (view(x) == 1).all()
    
    
    
    def test_event_management(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        from pyopencl.clrandom import rand as clrand
    
        x = clrand(queue, (5, 10), dtype=np.float32)
    
        assert len(x.events) == 1, len(x.events)
    
    
        x.finish()
    
        assert len(x.events) == 0
    
        y = x+x
        assert len(y.events) == 1
        y = x*x
        assert len(y.events) == 1
        y = 2*x
        assert len(y.events) == 1
        y = 2/x
        assert len(y.events) == 1
        y = x/2
        assert len(y.events) == 1
        y = x**2
        assert len(y.events) == 1
        y = 2**x
        assert len(y.events) == 1
    
        for i in range(10):
            x.fill(0)
    
        assert len(x.events) == 10
    
        for i in range(1000):
            x.fill(0)
    
        assert len(x.events) < 100
    
    
    
    def test_reshape(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        a = np.arange(128).reshape(8, 16).astype(np.float32)
        a_dev = cl_array.to_device(queue, a)
    
        # different ways to specify the shape
        a_dev.reshape(4, 32)
        a_dev.reshape((4, 32))
        a_dev.reshape([4, 32])
    
        # using -1 as unknown dimension
        assert a_dev.reshape(-1, 32).shape == (4, 32)
        assert a_dev.reshape((32, -1)).shape == (32, 4)
    
        assert a_dev.reshape((8, -1, 4)).shape == (8, 4, 4)
    
    def test_skip_slicing(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        a_host = np.arange(16).reshape((4, 4))
        b_host = a_host[::3]
    
        a = cl_array.to_device(queue, a_host)
        b = a[::3]
        assert b.shape == b_host.shape
    
    Matt Wala's avatar
    Matt Wala committed
        assert np.array_equal(b[1].get(), b_host[1])  # noqa pylint:disable=unsubscriptable-object
    
    def test_transpose(ctx_factory):
    
        if _PYPY:
            pytest.xfail("numpypy: no array creation from __array_interface__")
    
    
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        from pyopencl.clrandom import rand as clrand
    
        a_gpu = clrand(queue, (10, 20, 30), dtype=np.float32)
        a = a_gpu.get()
    
        # FIXME: not contiguous
        #assert np.allclose(a_gpu.transpose((1,2,0)).get(), a.transpose((1,2,0)))
        assert np.array_equal(a_gpu.T.get(), a.T)
    
    
    def test_newaxis(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        from pyopencl.clrandom import rand as clrand
    
        a_gpu = clrand(queue, (10, 20, 30), dtype=np.float32)
        a = a_gpu.get()
    
        b_gpu = a_gpu[:, np.newaxis]
        b = a[:, np.newaxis]
    
        assert b_gpu.shape == b.shape
    
        for i in range(b.ndim):
            if b.shape[i] > 1:
                assert b_gpu.strides[i] == b.strides[i]
    
    def test_squeeze(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        shape = (40, 2, 5, 100)
        a_cpu = np.random.random(size=shape)
        a_gpu = cl_array.to_device(queue, a_cpu)
    
        # Slice with length 1 on dimensions 0 and 1
        a_gpu_slice = a_gpu[0:1, 1:2, :, :]
        assert a_gpu_slice.shape == (1, 1, shape[2], shape[3])
    
        assert a_gpu_slice.flags.c_contiguous
    
    
        # Squeeze it and obtain contiguity
        a_gpu_squeezed_slice = a_gpu[0:1, 1:2, :, :].squeeze()
        assert a_gpu_squeezed_slice.shape == (shape[2], shape[3])
    
        assert a_gpu_squeezed_slice.flags.c_contiguous
    
    
        # Check that we get the original values out
        #assert np.all(a_gpu_slice.get().ravel() == a_gpu_squeezed_slice.get().ravel())
    
        # Slice with length 1 on dimensions 2
        a_gpu_slice = a_gpu[:, :, 2:3, :]
        assert a_gpu_slice.shape == (shape[0], shape[1], 1, shape[3])
    
        assert not a_gpu_slice.flags.c_contiguous
    
    
        # Squeeze it, but no contiguity here
        a_gpu_squeezed_slice = a_gpu[:, :, 2:3, :].squeeze()
        assert a_gpu_squeezed_slice.shape == (shape[0], shape[1], shape[3])
    
        assert not a_gpu_squeezed_slice.flags.c_contiguous
    
    
        # Check that we get the original values out
        #assert np.all(a_gpu_slice.get().ravel() == a_gpu_squeezed_slice.get().ravel())
    
    
    
    def test_fancy_fill(ctx_factory):
    
        if _PYPY:
            pytest.xfail("numpypy: multi value setting is not supported")
    
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        numpy_dest = np.zeros((4,), np.int32)
        numpy_idx = np.arange(3, dtype=np.int32)
        numpy_src = np.arange(8, 9, dtype=np.int32)
        numpy_dest[numpy_idx] = numpy_src
    
        cl_dest = cl_array.zeros(queue, (4,), np.int32)
        cl_idx = cl_array.arange(queue, 3, dtype=np.int32)
        cl_src = cl_array.arange(queue, 8, 9, dtype=np.int32)
        cl_dest[cl_idx] = cl_src
    
        assert np.all(numpy_dest == cl_dest.get())
    
    
    def test_fancy_indexing(ctx_factory):
    
        if _PYPY:
            pytest.xfail("numpypy: multi value setting is not supported")
    
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
    
        n = 2 ** 20 + 2**18 + 22
        numpy_dest = np.zeros(n, dtype=np.int32)
        numpy_idx = np.arange(n, dtype=np.int32)
        np.random.shuffle(numpy_idx)
        numpy_src = 20000+np.arange(n, dtype=np.int32)
    
        cl_dest = cl_array.to_device(queue, numpy_dest)
        cl_idx = cl_array.to_device(queue, numpy_idx)
        cl_src = cl_array.to_device(queue, numpy_src)
    
    
        numpy_dest[numpy_idx] = numpy_src
        cl_dest[cl_idx] = cl_src
    
    
        assert np.array_equal(numpy_dest, cl_dest.get())
    
        numpy_dest = numpy_src[numpy_idx]
        cl_dest = cl_src[cl_idx]
    
        assert np.array_equal(numpy_dest, cl_dest.get())
    
        if _PYPY:
            pytest.xfail("numpypy: multi value setting is not supported")
    
    
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        cl_arrays = [
            cl_array.arange(queue, 0, 3, dtype=np.float32)
            for i in range(1, 10)
        ]
        idx = cl_array.arange(queue, 0, 6, dtype=np.int32)
        out_arrays = [
            cl_array.zeros(queue, (10,), np.float32)
            for i in range(9)
        ]
    
        out_compare = [np.zeros((10,), np.float32) for i in range(9)]
        for i, ary in enumerate(out_compare):
    
            ary[idx.get()] = np.arange(0, 6, dtype=np.float32)
    
    
        cl_array.multi_put(cl_arrays, idx, out=out_arrays)
    
    
        assert np.all(np.all(out_compare[i] == out_arrays[i].get()) for i in range(9))
    
    zachjweiner's avatar
    zachjweiner committed
    def test_get_async(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
    
        device = queue.device
        if device.platform.vendor == "The pocl project" \
                and device.type & cl.device_type.GPU:
            pytest.xfail("the async get test fails on POCL + Nvidia,"
                    "at least the K40, as of pocl 1.6, 2021-01-20")
    
    
        a = np.random.rand(10**6).astype(np.dtype("float32"))
    
    zachjweiner's avatar
    zachjweiner committed
        a_gpu = cl_array.to_device(queue, a)
        b = a + a**5 + 1
        b_gpu = a_gpu + a_gpu**5 + 1
    
        # deprecated, but still test
        b1 = b_gpu.get(async_=True)  # testing that this waits for events
        b_gpu.finish()
        assert np.abs(b1 - b).mean() < 1e-5
    
    
    zachjweiner's avatar
    zachjweiner committed
        b1, evt = b_gpu.get_async()  # testing that this waits for events
        evt.wait()
    
    zachjweiner's avatar
    zachjweiner committed
        assert np.abs(b1 - b).mean() < 1e-5
    
        wait_event = cl.UserEvent(context)
        b_gpu.add_event(wait_event)
    
    zachjweiner's avatar
    zachjweiner committed
        b, evt = b_gpu.get_async()  # testing that this doesn't hang
    
    zachjweiner's avatar
    zachjweiner committed
        wait_event.set_status(cl.command_execution_status.COMPLETE)
    
    zachjweiner's avatar
    zachjweiner committed
        evt.wait()
    
    zachjweiner's avatar
    zachjweiner committed
        assert np.abs(b1 - b).mean() < 1e-5
    
    
    
    def test_outoforderqueue_get(ctx_factory):
        context = ctx_factory()
        try:
    
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
            queue = cl.CommandQueue(context,
                   properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
    
        except Exception:
            pytest.skip("out-of-order queue not available")
    
        a = np.random.rand(10**6).astype(np.dtype("float32"))
    
        a_gpu = cl_array.to_device(queue, a)
        b_gpu = a_gpu + a_gpu**5 + 1
    
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
        b1 = b_gpu.get()  # testing that this waits for events
    
        b = a + a**5 + 1
        assert np.abs(b1 - b).mean() < 1e-5
    
    
    def test_outoforderqueue_copy(ctx_factory):
        context = ctx_factory()
        try:
    
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
            queue = cl.CommandQueue(context,
                   properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
    
        except Exception:
            pytest.skip("out-of-order queue not available")
    
        a = np.random.rand(10**6).astype(np.dtype("float32"))
    
        a_gpu = cl_array.to_device(queue, a)
        c_gpu = a_gpu**2 - 7
    
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
        b_gpu = c_gpu.copy()  # testing that this waits for and creates events
    
        b_gpu *= 10
        queue.finish()
        b1 = b_gpu.get()
        b = 10 * (a**2 - 7)
        assert np.abs(b1 - b).mean() < 1e-5
    
    def test_outoforderqueue_indexing(ctx_factory):
        context = ctx_factory()
        try:
    
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
            queue = cl.CommandQueue(context,
                   properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
    
        except Exception:
            pytest.skip("out-of-order queue not available")
    
        a = np.random.rand(10**6).astype(np.dtype("float32"))
        i = (8e5 + 1e5 * np.random.rand(10**5)).astype(np.dtype("int32"))
    
        a_gpu = cl_array.to_device(queue, a)
        i_gpu = cl_array.to_device(queue, i)
        c_gpu = (a_gpu**2)[i_gpu - 10000]
        b_gpu = 10 - a_gpu
        b_gpu[:] = 8 * a_gpu
        b_gpu[i_gpu + 10000] = c_gpu - 10
        queue.finish()
        b1 = b_gpu.get()
        c = (a**2)[i - 10000]
        b = 8 * a
        b[i + 10000] = c - 10
        assert np.abs(b1 - b).mean() < 1e-5
    
    
    def test_outoforderqueue_reductions(ctx_factory):
        context = ctx_factory()
        try:
    
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
            queue = cl.CommandQueue(context,
                   properties=cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
    
        except Exception:
            pytest.skip("out-of-order queue not available")
    
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
        # 0/1 values to avoid accumulated rounding error
    
        a = (np.random.rand(10**6) > 0.5).astype(np.dtype("float32"))
    
    Rebecca N. Palmer's avatar
    Rebecca N. Palmer committed
        a[800000] = 10  # all<5 looks true until near the end
    
        a_gpu = cl_array.to_device(queue, a)
        b1 = cl_array.sum(a_gpu).get()
        b2 = cl_array.dot(a_gpu, 3 - a_gpu).get()
        b3 = (a_gpu < 5).all().get()
        assert b1 == a.sum() and b2 == a.dot(3 - a) and b3 == 0
    
    
    def test_negative_dim_rejection(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        with pytest.raises(ValueError):
    
            cl_array.Array(queue, shape=-10, dtype=np.float64)
    
    
        with pytest.raises(ValueError):
    
            cl_array.Array(queue, shape=(-10,), dtype=np.float64)
    
    Matt Wala's avatar
    Matt Wala committed
        for left_dim in (-1, 0, 1):
            with pytest.raises(ValueError):
    
                cl_array.Array(queue, shape=(left_dim, -1), dtype=np.float64)
    
    Matt Wala's avatar
    Matt Wala committed
        for right_dim in (-1, 0, 1):
            with pytest.raises(ValueError):
    
                cl_array.Array(queue, shape=(-1, right_dim), dtype=np.float64)
    
    @pytest.mark.parametrize("empty_shape", [0, (), (3, 0, 2), (0, 5), (5, 0)])
    
    def test_zero_size_array(ctx_factory, empty_shape):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
    
        if queue.device.platform.name == "Intel(R) OpenCL":
    
            pytest.xfail("size-0 arrays fail on Intel CL")
    
    
        a = cl_array.zeros(queue, empty_shape, dtype=np.float32)
        b = cl_array.zeros(queue, empty_shape, dtype=np.float32)
        b.fill(1)
        c = a + b
        c_host = c.get()
        cl_array.to_device(queue, c_host)
    
    
        assert c.flags.c_contiguous == c_host.flags.c_contiguous
        assert c.flags.f_contiguous == c_host.flags.f_contiguous
    
        for order in "CF":
            c_flat = c.reshape(-1, order=order)
            c_host_flat = c_host.reshape(-1, order=order)
            assert c_flat.shape == c_host_flat.shape
            assert c_flat.strides == c_host_flat.strides
            assert c_flat.flags.c_contiguous == c_host_flat.flags.c_contiguous
            assert c_flat.flags.f_contiguous == c_host_flat.flags.f_contiguous
    
    
    def test_str_without_queue(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)
    
        a = cl_array.zeros(queue, 10, dtype=np.float32).with_queue(None)
        print(str(a))
        print(repr(a))
    
    
    
    @pytest.mark.parametrize("order", ("F", "C"))
    
    Kaushik Kulkarni's avatar
    Kaushik Kulkarni committed
    @pytest.mark.parametrize("input_dims", (1, 2, 3))
    
    def test_stack(ctx_factory, input_dims, order):
    
    Kaushik Kulkarni's avatar
    Kaushik Kulkarni committed
        # Replicates pytato/test/test_codegen.py::test_stack
        import pyopencl.array as cla
        cl_ctx = ctx_factory()
        queue = cl.CommandQueue(cl_ctx)
    
        shape = (2, 2, 2)[:input_dims]
    
        axis = -1 if order == "F" else 0
    
    Kaushik Kulkarni's avatar
    Kaushik Kulkarni committed
    
        from numpy.random import default_rng
        rng = default_rng()
        x_in = rng.random(size=shape)
        y_in = rng.random(size=shape)
    
        x_in = x_in if order == "C" else np.asfortranarray(x_in)
        y_in = y_in if order == "C" else np.asfortranarray(y_in)
    
    Kaushik Kulkarni's avatar
    Kaushik Kulkarni committed
    
        x = cla.to_device(queue, x_in)
        y = cla.to_device(queue, y_in)
    
    
        np.testing.assert_allclose(cla.stack((x, y), axis=axis).get(),
                                    np.stack((x_in, y_in), axis=axis))
    
    def test_assign_different_strides(ctx_factory):
        cl_ctx = ctx_factory()
        queue = cl.CommandQueue(cl_ctx)
    
        from pyopencl.clrandom import rand as clrand
    
        a = clrand(queue, (20, 30), dtype=np.float32)
        b = cl_array.empty(queue, (20, 30), dtype=np.float32, order="F")
        with pytest.raises(NotImplementedError):
            b[:] = a
    
    
    
    def test_branch_operations_on_pure_scalars(ctx_factory):
        x = np.random.rand()
        y = np.random.rand()
        cond = np.random.choice([False, True])
    
        np.testing.assert_allclose(np.maximum(x, y),
                                   cl_array.maximum(x, y))
        np.testing.assert_allclose(np.minimum(x, y),
                                   cl_array.minimum(x, y))
        np.testing.assert_allclose(np.where(cond, x, y),
                                   cl_array.if_positive(cond, x, y))
    
    
    
    if __name__ == "__main__":
        if len(sys.argv) > 1:
    
            exec(sys.argv[1])
    
            from pytest import main
    
            main([__file__])
    
    Andreas Klöckner's avatar
    Andreas Klöckner committed
    # vim: filetype=pyopencl:fdm=marker