diff --git a/test/test_loopy.py b/test/test_loopy.py index 524ba45f2caa2035789b85fca41870efa3d108eb..927f5a475f2cc11f2d74876676947765be40f010 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1062,12 +1062,12 @@ def test_atomic(ctx_factory, dtype): lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=10000)) -def test_atomic_load(ctx_factory): - dtype = np.float32 +@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64]) +def test_atomic_load(ctx_factory, dtype): ctx = ctx_factory() queue = cl.CommandQueue(ctx) from loopy.kernel.data import temp_var_scope as scopes - n = 100 + n = 10 vec_width = 4 if ( @@ -1082,21 +1082,20 @@ def test_atomic_load(ctx_factory): pytest.skip("int64 RNG not supported in PyOpenCL < 2015.2") knl = lp.make_kernel( - "{ [i,j]: 0<=i,j<100 }", + "{ [i,j]: 0<=i,j upper = 0 <> lower = 0 - temp[0] = 0 {id=init, atomic} + temp = 0 {id=init, atomic} for i upper = upper + i * a[i] {id=sum0} lower = lower - b[i] {id=sum1} end - ... lbarrier {id=lb1, dep=sum1:init} - temp[0] = temp[0] + lower {id=temp_sum, dep=sum*:lb1:init, atomic,\ + temp = temp + lower {id=temp_sum, dep=sum*:init, atomic,\ nosync=init} ... lbarrier {id=lb2, dep=temp_sum} - out[j] = upper / temp[0] {id=final, dep=sum*:temp_sum:lb2, atomic,\ + out[j] = upper / temp {id=final, dep=lb2, atomic,\ nosync=init:temp_sum} end """, @@ -1105,18 +1104,18 @@ def test_atomic_load(ctx_factory): lp.GlobalArg("a", dtype, shape=lp.auto), lp.GlobalArg("b", dtype, shape=lp.auto), lp.TemporaryVariable('temp', dtype, for_atomic=True, - scope=scopes.LOCAL, shape=(vec_width,)), + scope=scopes.LOCAL), "..." ], silenced_warnings=["write_race(init)", "write_race(temp_sum)"]) - + knl = lp.fix_parameters(knl, n=n) knl = lp.split_iname(knl, "j", vec_width, inner_tag="l.0") _, out = knl(queue, a=np.arange(n, dtype=dtype), b=np.arange(n, dtype=dtype)) - assert np.allclose(out, np.full_like(out, (-(2 * n - 1) / float(3 * vec_width)))) + assert np.allclose(out, np.full_like(out, ((1 - 2 * n) / 3.0))) -def test_atomic_init(): - dtype = np.float32 +@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64]) +def test_atomic_init(dtype): vec_width = 4 knl = lp.make_kernel(