diff --git a/pycuda/tools.py b/pycuda/tools.py index 05ac3c52ec3e2e998c6e9be4337d1b8dfa1f04df..a92883c922a6c87ce4a7a10e2c0cd6d3cb0dc4c2 100644 --- a/pycuda/tools.py +++ b/pycuda/tools.py @@ -527,6 +527,20 @@ def mark_cuda_test(inner_f): return mark_test.cuda(f) +def init_cuda_context_fixture(): + import pycuda.driver as cuda + cuda.init() + ctx = make_default_context() + assert isinstance(ctx.get_device().name(), str) + assert isinstance(ctx.get_device().compute_capability(), tuple) + assert isinstance(ctx.get_device().get_attributes(), dict) + yield + + from gc import collect + ctx.pop() + clear_context_caches() + collect() + # }}} diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index ddd8b2f6eb0fac4a47795336eec35affdd4a0699..c2251b964e07116ed15b82d7da95d55f812c9223 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -3,17 +3,23 @@ import numpy as np import numpy.linalg as la import sys -from pycuda.tools import mark_cuda_test +from pycuda.tools import init_cuda_context_fixture from pycuda.characterize import has_double_support import pycuda.gpuarray as gpuarray import pycuda.driver as drv from pycuda.compiler import SourceModule +import pytest +@pytest.fixture(autouse=True) +def init_cuda_context(): + yield from init_cuda_context_fixture() + + +@pytest.mark.cuda class TestGPUArray: - @mark_cuda_test def test_pow_array(self): a = np.array([1, 2, 3, 4, 5]).astype(np.float32) a_gpu = gpuarray.to_gpu(a) @@ -30,9 +36,9 @@ class TestGPUArray: a_gpu = a_gpu.get() np.testing.assert_allclose(pow(a, b), a_gpu, rtol=1e-6) - @mark_cuda_test - def test_pow_number(self): - a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float32) + @pytest.mark.parametrize("dtype", [np.float32, np.float64]) + def test_pow_number(self, dtype): + a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(dtype) a_gpu = gpuarray.to_gpu(a) result = pow(a_gpu, 2).get() @@ -42,7 +48,6 @@ class TestGPUArray: a_gpu = a_gpu.get() np.testing.assert_allclose(a ** 2, a_gpu, rtol=1e-6) - @mark_cuda_test def test_rpow_array(self): scalar = np.random.rand() a = abs(np.random.rand(10)) @@ -57,18 +62,15 @@ class TestGPUArray: result = (a_gpu ** scalar).get() np.testing.assert_allclose(a ** scalar, result) - @mark_cuda_test def test_numpy_integer_shape(self): gpuarray.empty(np.int32(17), np.float32) gpuarray.empty((np.int32(17), np.int32(17)), np.float32) - @mark_cuda_test def test_ndarray_shape(self): gpuarray.empty(np.array(3), np.float32) gpuarray.empty(np.array([3]), np.float32) gpuarray.empty(np.array([2, 3]), np.float32) - @mark_cuda_test def test_abs(self): a = -gpuarray.arange(111, dtype=np.float32) res = a.get() @@ -84,13 +86,11 @@ class TestGPUArray: assert abs(res[i]) >= 0 assert res[i] == i - @mark_cuda_test def test_len(self): a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float32) a_cpu = gpuarray.to_gpu(a) assert len(a_cpu) == 10 - @mark_cuda_test def test_multiply(self): """Test the muliplication of an array with a scalar. """ @@ -103,7 +103,6 @@ class TestGPUArray: assert (a * scalar == a_doubled).all() - @mark_cuda_test def test_rmul_yields_right_type(self): a = np.array([1, 2, 3, 4, 5]).astype(np.float32) a_gpu = gpuarray.to_gpu(a) @@ -114,7 +113,6 @@ class TestGPUArray: two_a = np.float32(2) * a_gpu assert isinstance(two_a, gpuarray.GPUArray) - @mark_cuda_test def test_multiply_array(self): """Test the multiplication of two arrays.""" @@ -127,7 +125,6 @@ class TestGPUArray: assert (a * a == a_squared).all() - @mark_cuda_test def test_addition_array(self): """Test the addition of two arrays.""" @@ -137,7 +134,6 @@ class TestGPUArray: assert (a + a == a_added).all() - @mark_cuda_test def test_iaddition_array(self): """Test the inplace addition of two arrays.""" @@ -148,7 +144,6 @@ class TestGPUArray: assert (a + a == a_added).all() - @mark_cuda_test def test_addition_scalar(self): """Test the addition of an array and a scalar.""" @@ -158,7 +153,6 @@ class TestGPUArray: assert (7 + a == a_added).all() - @mark_cuda_test def test_iaddition_scalar(self): """Test the inplace addition of an array and a scalar.""" @@ -169,7 +163,6 @@ class TestGPUArray: assert (7 + a == a_added).all() - @mark_cuda_test def test_substract_array(self): """Test the subtraction of two arrays.""" # test data @@ -185,7 +178,6 @@ class TestGPUArray: result = (b_gpu - a_gpu).get() assert (b - a == result).all() - @mark_cuda_test def test_substract_scalar(self): """Test the subtraction of an array and a scalar.""" @@ -201,7 +193,6 @@ class TestGPUArray: result = (7 - a_gpu).get() assert (7 - a == result).all() - @mark_cuda_test def test_divide_scalar(self): """Test the division of an array and a scalar.""" @@ -214,7 +205,6 @@ class TestGPUArray: result = (2 / a_gpu).get() assert (2 / a == result).all() - @mark_cuda_test def test_divide_array(self): """Test the division of an array and a scalar. """ @@ -231,7 +221,6 @@ class TestGPUArray: a_divide = (b_gpu / a_gpu).get() assert (np.abs(b / a - a_divide) < 1e-3).all() - @mark_cuda_test def test_random(self): from pycuda.curandom import rand as curand @@ -246,7 +235,6 @@ class TestGPUArray: assert (0 <= a).all() assert (a < 1).all() - @mark_cuda_test def test_curand_wrappers(self): from pycuda.curandom import get_curand_version @@ -321,7 +309,6 @@ class TestGPUArray: # # Compare with scipy.stats.poisson.pmf(v - 1, v) # assert np.isclose(0.12511, tmp, atol=0.002) - @mark_cuda_test def test_array_gt(self): """Test whether array contents are > the other array's contents""" @@ -334,7 +321,6 @@ class TestGPUArray: assert result[0] assert not result[1] - @mark_cuda_test def test_array_lt(self): """Test whether array contents are < the other array's contents""" @@ -347,7 +333,6 @@ class TestGPUArray: assert result[0] assert not result[1] - @mark_cuda_test def test_array_le(self): """Test whether array contents are <= the other array's contents""" @@ -361,7 +346,6 @@ class TestGPUArray: assert result[1] assert not result[2] - @mark_cuda_test def test_array_ge(self): """Test whether array contents are >= the other array's contents""" @@ -375,7 +359,6 @@ class TestGPUArray: assert result[1] assert not result[2] - @mark_cuda_test def test_array_eq(self): """Test whether array contents are == the other array's contents""" @@ -388,7 +371,6 @@ class TestGPUArray: assert not result[0] assert result[1] - @mark_cuda_test def test_array_ne(self): """Test whether array contents are != the other array's contents""" @@ -401,7 +383,6 @@ class TestGPUArray: assert result[0] assert not result[1] - @mark_cuda_test def test_nan_arithmetic(self): def make_nan_contaminated_vector(size): shape = (size,) @@ -426,7 +407,6 @@ class TestGPUArray: assert (np.isnan(ab) == np.isnan(ab_gpu)).all() - @mark_cuda_test def test_elwise_kernel(self): from pycuda.curandom import rand as curand @@ -446,7 +426,6 @@ class TestGPUArray: assert la.norm((c_gpu - (5 * a_gpu + 6 * b_gpu)).get()) < 1e-5 - @mark_cuda_test def test_ranged_elwise_kernel(self): from pycuda.elementwise import ElementwiseKernel @@ -470,7 +449,6 @@ class TestGPUArray: assert la.norm(a_cpu - a_gpu.get()) == 0, i - @mark_cuda_test def test_take(self): idx = gpuarray.arange(0, 10000, 2, dtype=np.uint32) for dtype in [np.float32, np.complex64]: @@ -480,12 +458,10 @@ class TestGPUArray: assert (a_host[idx.get()] == result.get()).all() - @mark_cuda_test def test_arange(self): a = gpuarray.arange(12, dtype=np.float32) assert (np.arange(12, dtype=np.float32) == a.get()).all() - @mark_cuda_test def test_ones(self): ones = np.ones(10) @@ -494,35 +470,30 @@ class TestGPUArray: np.testing.assert_allclose(ones, ones_gpu.get(), rtol=1e-6) assert ones.dtype == ones_gpu.dtype - @mark_cuda_test - def test_stack(self): + @pytest.mark.parametrize("order", ["F", "C"]) + @pytest.mark.parametrize("input_dims", [0, 1, 2]) + def test_stack(self, order, input_dims): - orders = ["F", "C"] - input_dims_lst = [0, 1, 2] + shape = (2, 2, 2)[:input_dims] + axis = -1 if order == "F" else 0 - for order in orders: - for input_dims in input_dims_lst: - shape = (2, 2, 2)[:input_dims] - axis = -1 if order == "F" else 0 + from numpy.random import default_rng + rng = default_rng() + x_in = rng.random(size=shape) + y_in = rng.random(size=shape) + x_in = x_in if order == "C" else np.asfortranarray(x_in) + y_in = y_in if order == "C" else np.asfortranarray(y_in) - from numpy.random import default_rng - rng = default_rng() - x_in = rng.random(size=shape) - y_in = rng.random(size=shape) - x_in = x_in if order == "C" else np.asfortranarray(x_in) - y_in = y_in if order == "C" else np.asfortranarray(y_in) + x_gpu = gpuarray.to_gpu(x_in) + y_gpu = gpuarray.to_gpu(y_in) - x_gpu = gpuarray.to_gpu(x_in) - y_gpu = gpuarray.to_gpu(y_in) + numpy_stack = np.stack((x_in, y_in), axis=axis) + gpuarray_stack = gpuarray.stack((x_gpu, y_gpu), axis=axis) - numpy_stack = np.stack((x_in, y_in), axis=axis) - gpuarray_stack = gpuarray.stack((x_gpu, y_gpu), axis=axis) + np.testing.assert_allclose(gpuarray_stack.get(), numpy_stack) - np.testing.assert_allclose(gpuarray_stack.get(), numpy_stack) + assert gpuarray_stack.shape == numpy_stack.shape - assert gpuarray_stack.shape == numpy_stack.shape - - @mark_cuda_test def test_concatenate(self): from pycuda.curandom import rand as curand @@ -541,7 +512,6 @@ class TestGPUArray: assert cat.shape == cat_dev.shape - @mark_cuda_test def test_reverse(self): a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float32) a_cpu = gpuarray.to_gpu(a) @@ -553,7 +523,6 @@ class TestGPUArray: for i in range(0, 10): assert a[len(a) - 1 - i] == b[i] - @mark_cuda_test def test_sum(self): from pycuda.curandom import rand as curand @@ -566,7 +535,6 @@ class TestGPUArray: assert abs(sum_a_gpu - sum_a) / abs(sum_a) < 1e-4 - @mark_cuda_test def test_minmax(self): from pycuda.curandom import rand as curand @@ -585,7 +553,6 @@ class TestGPUArray: assert op_a_gpu == op_a, (op_a_gpu, op_a, dtype, what) - @mark_cuda_test def test_subset_minmax(self): from pycuda.curandom import rand as curand @@ -619,41 +586,38 @@ class TestGPUArray: assert min_a_gpu == min_a - @mark_cuda_test - def test_dot(self): + @pytest.mark.parametrize("sz", [2, + 3, + 4, + 5, + 6, + 7, + 31, + 32, + 33, + 127, + 128, + 129, + 255, + 256, + 257, + 16384 - 993, + 20000, + ]) + def test_dot(self, sz): from pycuda.curandom import rand as curand - for sz in [ - 2, - 3, - 4, - 5, - 6, - 7, - 31, - 32, - 33, - 127, - 128, - 129, - 255, - 256, - 257, - 16384 - 993, - 20000, - ]: - a_gpu = curand((sz,)) - a = a_gpu.get() - b_gpu = curand((sz,)) - b = b_gpu.get() + a_gpu = curand((sz,)) + a = a_gpu.get() + b_gpu = curand((sz,)) + b = b_gpu.get() - dot_ab = np.dot(a, b) + dot_ab = np.dot(a, b) - dot_ab_gpu = gpuarray.dot(a_gpu, b_gpu).get() + dot_ab_gpu = gpuarray.dot(a_gpu, b_gpu).get() - assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4 + assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4 - @mark_cuda_test def test_slice(self): from pycuda.curandom import rand as curand @@ -672,7 +636,6 @@ class TestGPUArray: assert la.norm(a_gpu_slice.get() - a_slice) == 0 - @mark_cuda_test def test_2d_slice_c(self): from pycuda.curandom import rand as curand @@ -692,7 +655,6 @@ class TestGPUArray: assert la.norm(a_gpu_slice.get() - a_slice) == 0 - @mark_cuda_test def test_2d_slice_f(self): from pycuda.curandom import rand as curand import pycuda.gpuarray as gpuarray @@ -716,7 +678,6 @@ class TestGPUArray: assert la.norm(a_gpu_slice.get() - a_slice) == 0 - @mark_cuda_test def test_if_positive(self): from pycuda.curandom import rand as curand @@ -737,7 +698,6 @@ class TestGPUArray: assert la.norm(max_a_b_gpu.get() - np.maximum(a, b)) == 0 assert la.norm(min_a_b_gpu.get() - np.minimum(a, b)) == 0 - @mark_cuda_test def test_take_put(self): for n in [5, 17, 333]: one_field_size = 8 @@ -759,7 +719,6 @@ class TestGPUArray: drv.Context.synchronize() - @mark_cuda_test def test_astype(self): from pycuda.curandom import rand as curand @@ -782,7 +741,6 @@ class TestGPUArray: assert a2.dtype == np.float32 assert la.norm(a - a2) / la.norm(a) < 1e-7 - @mark_cuda_test def test_complex_bits(self): from pycuda.curandom import rand as curand @@ -827,7 +785,6 @@ class TestGPUArray: assert zdata.imag.flags.f_contiguous assert zdata.conj().flags.f_contiguous - @mark_cuda_test def test_pass_slice_to_kernel(self): mod = SourceModule( """ @@ -848,9 +805,9 @@ class TestGPUArray: a = a_gpu.get() assert (a[255:257] == np.array([1, 2], np.float32)).all() - assert (a[255 * 256 - 1: 255 * 256 + 1] == np.array([2, 1], np.float32)).all() + np.testing.assert_array_equal(a[255 * 256 - 1: 255 * 256 + 1], + np.array([2, 1], np.float32)) - @mark_cuda_test def test_scan(self): from pycuda.scan import ExclusiveScanKernel, InclusiveScanKernel @@ -879,7 +836,6 @@ class TestGPUArray: assert (gpu_data.get() == desired_result).all() - @mark_cuda_test def test_stride_preservation(self): A = np.random.rand(3, 3) AT = A.T @@ -888,18 +844,15 @@ class TestGPUArray: print((AT_GPU.flags.f_contiguous, AT_GPU.flags.c_contiguous)) assert np.allclose(AT_GPU.get(), AT) - @mark_cuda_test def test_vector_fill(self): a_gpu = gpuarray.GPUArray(100, dtype=gpuarray.vec.float3) a_gpu.fill(gpuarray.vec.make_float3(0.0, 0.0, 0.0)) a = a_gpu.get() assert a.dtype == gpuarray.vec.float3 - @mark_cuda_test def test_create_complex_zeros(self): gpuarray.zeros(3, np.complex64) - @mark_cuda_test def test_reshape(self): a = np.arange(128).reshape(8, 16).astype(np.float32) a_gpu = gpuarray.to_gpu(a) @@ -932,7 +885,6 @@ class TestGPUArray: a_gpu = a_gpu.reshape((4, 32)) assert a_gpu.flags.c_contiguous - @mark_cuda_test def test_view(self): a = np.arange(128).reshape(8, 16).astype(np.float32) a_gpu = gpuarray.to_gpu(a) @@ -949,7 +901,6 @@ class TestGPUArray: view = a_gpu.view(np.int16) assert view.shape == (8, 32) and view.dtype == np.int16 - @mark_cuda_test def test_squeeze(self): shape = (40, 2, 5, 100) a_cpu = np.random.random(size=shape) @@ -966,7 +917,8 @@ class TestGPUArray: assert a_gpu_squeezed_slice.flags.c_contiguous # Check that we get the original values out - assert np.all(a_gpu_slice.get().ravel() == a_gpu_squeezed_slice.get().ravel()) + np.testing.assert_array_equal(a_gpu_slice.get().ravel(), + a_gpu_squeezed_slice.get().ravel()) # Slice with length 1 on dimensions 2 a_gpu_slice = a_gpu[:, :, 2:3, :] @@ -979,9 +931,9 @@ class TestGPUArray: assert not a_gpu_squeezed_slice.flags.c_contiguous # Check that we get the original values out - assert np.all(a_gpu_slice.get().ravel() == a_gpu_squeezed_slice.get().ravel()) + np.testing.assert_array_equal(a_gpu_slice.get().ravel(), + a_gpu_squeezed_slice.get().ravel()) - @mark_cuda_test def test_struct_reduce(self): preamble = """ struct minmax_collector @@ -1053,7 +1005,6 @@ class TestGPUArray: assert minmax["cur_min"] == np.min(a) assert minmax["cur_max"] == np.max(a) - @mark_cuda_test def test_reduce_out(self): from pycuda.curandom import rand as curand @@ -1071,14 +1022,15 @@ class TestGPUArray: assert np.alltrue(a.max(axis=1) == max_gpu.get()) - @mark_cuda_test def test_sum_allocator(self): # FIXME from pytest import skip skip("https://github.com/inducer/pycuda/issues/163") - # crashes with terminate called after throwing an instance of 'pycuda::error' - # what(): explicit_context_dependent failed: invalid device context - no currently active context? + # crashes with terminate called after throwing an instance + # of 'pycuda::error' + # what(): explicit_context_dependent failed: invalid device context - + # no currently active context? import pycuda.tools @@ -1098,7 +1050,6 @@ class TestGPUArray: assert b.allocator == a.allocator assert c.allocator == pool.allocate - @mark_cuda_test def test_dot_allocator(self): # FIXME from pytest import skip @@ -1130,7 +1081,6 @@ class TestGPUArray: assert dot_gpu_1.allocator == a_gpu.allocator assert dot_gpu_2.allocator == pool.allocate - @mark_cuda_test def test_view_and_strides(self): from pycuda.curandom import rand as curand @@ -1143,7 +1093,6 @@ class TestGPUArray: assert np.array_equal(y.get(), X.get()[:3, :5]) - @mark_cuda_test def test_scalar_comparisons(self): a = np.array([1.0, 0.25, 0.1, -0.1, 0.0]) a_gpu = gpuarray.to_gpu(a) @@ -1164,7 +1113,6 @@ class TestGPUArray: x = (a == 1).astype(a.dtype) assert (x == x_gpu.get()).all() - @mark_cuda_test def test_minimum_maximum_scalar(self): from pycuda.curandom import rand as curand @@ -1180,17 +1128,14 @@ class TestGPUArray: assert la.norm(max_a0_gpu.get() - np.maximum(a, 0)) == 0 assert la.norm(min_a0_gpu.get() - np.minimum(0, a)) == 0 - @mark_cuda_test def test_transpose(self): from pycuda.curandom import rand as curand a_gpu = curand((10, 20, 30)) a = a_gpu.get() - # assert np.allclose(a_gpu.transpose((1,2,0)).get(), a.transpose((1,2,0))) # not contiguous assert np.allclose(a_gpu.T.get(), a.T) - @mark_cuda_test def test_newaxis(self): from pycuda.curandom import rand as curand @@ -1203,7 +1148,6 @@ class TestGPUArray: assert b_gpu.shape == b.shape assert b_gpu.strides == b.strides - @mark_cuda_test def test_copy(self): from pycuda.curandom import rand as curand @@ -1242,7 +1186,6 @@ class TestGPUArray: a_gpu.get()[start:stop:step, :, start:stop:step], ) - @mark_cuda_test def test_get_set(self): import pycuda.gpuarray as gpuarray @@ -1256,7 +1199,6 @@ class TestGPUArray: assert np.allclose(a_gpu.get(), a) assert np.allclose(a_gpu[1:3, 1:3, 1:3].get(), a[1:3, 1:3, 1:3]) - @mark_cuda_test def test_zeros_like_etc(self): shape = (16, 16) a = np.random.randn(*shape).astype(np.float32)