From 52932e9e16dfce6a597c23ef8b7ec0bdaa00fc57 Mon Sep 17 00:00:00 2001
From: arghdos <arghdos@gmail.com>
Date: Fri, 18 Aug 2017 10:23:12 -0400
Subject: [PATCH] fix atomic load test

---
 test/test_loopy.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/test/test_loopy.py b/test/test_loopy.py
index 1bdad6b76..0a3ce11f4 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -1017,6 +1017,7 @@ def test_atomic_load(ctx_factory):
     queue = cl.CommandQueue(ctx)
     from loopy.kernel.data import temp_var_scope as scopes
     n = 100
+    vec_width = 4
 
     if (
             np.dtype(dtype).itemsize == 8
@@ -1041,9 +1042,11 @@ def test_atomic_load(ctx_factory):
                     lower = lower - b[i] {id=sum1}
                 end
                 ... lbarrier {id=lb1, dep=sum1}
-                temp[0] = temp[0] + lower {id=temp_sum, dep=sum*:lb1:init, atomic}
+                temp[0] = temp[0] + lower {id=temp_sum, dep=sum*:lb1:init, atomic,\
+                                           nosync=init}
                 ... lbarrier {id=lb2, dep=temp_sum}
-                out[j] = upper / temp[0] {dep=sum*:temp_sum:lb2, atomic}
+                out[j] = upper / temp[0] {id=final, dep=sum*:temp_sum:lb2, atomic,\
+                                           nosync=init:temp_sum}
             end
             """,
             [
@@ -1051,12 +1054,13 @@ def test_atomic_load(ctx_factory):
                 lp.GlobalArg("a", dtype, shape=lp.auto),
                 lp.GlobalArg("b", dtype, shape=lp.auto),
                 lp.TemporaryVariable('temp', dtype, for_atomic=True,
-                                     scope=scopes.GLOBAL, shape=(1,)),
+                                     scope=scopes.LOCAL, shape=(1,)),
                 "..."
                 ])
 
-    knl = lp.split_iname(knl, "j", 512, inner_tag="l.0")
-    _, out = knl(queue, a=np.arange(n), b=np.arange(n))
+    knl = lp.split_iname(knl, "j", vec_width, inner_tag="l.0")
+    _, out = knl(queue, a=np.arange(n, dtype=dtype), b=np.arange(n, dtype=dtype))
+    assert np.allclose(out, np.full_like(out, (-(2 * n - 1) / (3 * vec_width))))
 
 
 def test_within_inames_and_reduction():
-- 
GitLab