diff --git a/test/test_loopy.py b/test/test_loopy.py
index 524ba45f2caa2035789b85fca41870efa3d108eb..927f5a475f2cc11f2d74876676947765be40f010 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -1062,12 +1062,12 @@ def test_atomic(ctx_factory, dtype):
     lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=10000))
 
 
-def test_atomic_load(ctx_factory):
-    dtype = np.float32
+@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64])
+def test_atomic_load(ctx_factory, dtype):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
     from loopy.kernel.data import temp_var_scope as scopes
-    n = 100
+    n = 10
     vec_width = 4
 
     if (
@@ -1082,21 +1082,20 @@ def test_atomic_load(ctx_factory):
         pytest.skip("int64 RNG not supported in PyOpenCL < 2015.2")
 
     knl = lp.make_kernel(
-            "{ [i,j]: 0<=i,j<100 }",
+            "{ [i,j]: 0<=i,j<n}",
             """
             for j
                 <> upper = 0
                 <> lower = 0
-                temp[0] = 0 {id=init, atomic}
+                temp = 0 {id=init, atomic}
                 for i
                     upper = upper + i * a[i] {id=sum0}
                     lower = lower - b[i] {id=sum1}
                 end
-                ... lbarrier {id=lb1, dep=sum1:init}
-                temp[0] = temp[0] + lower {id=temp_sum, dep=sum*:lb1:init, atomic,\
+                temp = temp + lower {id=temp_sum, dep=sum*:init, atomic,\
                                            nosync=init}
                 ... lbarrier {id=lb2, dep=temp_sum}
-                out[j] = upper / temp[0] {id=final, dep=sum*:temp_sum:lb2, atomic,\
+                out[j] = upper / temp {id=final, dep=lb2, atomic,\
                                            nosync=init:temp_sum}
             end
             """,
@@ -1105,18 +1104,18 @@ def test_atomic_load(ctx_factory):
                 lp.GlobalArg("a", dtype, shape=lp.auto),
                 lp.GlobalArg("b", dtype, shape=lp.auto),
                 lp.TemporaryVariable('temp', dtype, for_atomic=True,
-                                     scope=scopes.LOCAL, shape=(vec_width,)),
+                                     scope=scopes.LOCAL),
                 "..."
                 ],
             silenced_warnings=["write_race(init)", "write_race(temp_sum)"])
-
+    knl = lp.fix_parameters(knl, n=n)
     knl = lp.split_iname(knl, "j", vec_width, inner_tag="l.0")
     _, out = knl(queue, a=np.arange(n, dtype=dtype), b=np.arange(n, dtype=dtype))
-    assert np.allclose(out, np.full_like(out, (-(2 * n - 1) / float(3 * vec_width))))
+    assert np.allclose(out, np.full_like(out, ((1 - 2 * n) / 3.0)))
 
 
-def test_atomic_init():
-    dtype = np.float32
+@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64])
+def test_atomic_init(dtype):
     vec_width = 4
 
     knl = lp.make_kernel(