diff --git a/loopy/check.py b/loopy/check.py index 8d78e684bc0d793d6c17bbfebefc428d71fbb0b4..e72f9e3e6c4db797220729a5f282d4944b31d6ac 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -493,7 +493,6 @@ def check_that_atomic_ops_are_used_exactly_on_atomic_arrays(kernel): accessed_atomic_vars = insn.dependency_names() & atomicity_candidates if not accessed_atomic_vars <= atomic_accesses: - missed = atomic_accesses - accessed_atomic_vars raise LoopyError("atomic variable(s) '%s' in instruction '%s' " "used in non-atomic access" % ( diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 554bf6b24bfcd8e3d3db8e40dd6535d19984ae8c..b67262e6b6bdb35c711a4de31b13a06199e75a26 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -2046,7 +2046,8 @@ def check_atomic_loads(kernel): if isinstance(insn, Assignment): # look for atomic variables atomic_accesses = set(a.var_name for a in insn.atomicity) - accessed_atomic_vars = insn.dependency_names() & atomicity_candidates + accessed_atomic_vars = (insn.dependency_names() & atomicity_candidates)\ + - set([insn.assignee_var_names()[0]]) if not accessed_atomic_vars <= atomic_accesses: #if we're missing some missed = accessed_atomic_vars - atomic_accesses diff --git a/test/test_loopy.py b/test/test_loopy.py index f767197b09e2cddebf5e7b1eb9fc25da23e17746..1bdad6b763e3a8b19a0dc841d222eb6381ab73b6 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1014,6 +1014,9 @@ def test_atomic(ctx_factory, dtype): def test_atomic_load(ctx_factory): dtype = np.int32 ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + from loopy.kernel.data import temp_var_scope as scopes + n = 100 if ( np.dtype(dtype).itemsize == 8 @@ -1027,22 +1030,33 @@ def test_atomic_load(ctx_factory): pytest.skip("int64 RNG not supported in PyOpenCL < 2015.2") knl = lp.make_kernel( - "{ [i]: 0<=i upper = 0 + <> lower = 0 + temp[0] = 0 {id=init, atomic} + for i + upper = upper + i * a[i] {id=sum0} + lower = lower - b[i] {id=sum1} + end + ... lbarrier {id=lb1, dep=sum1} + temp[0] = temp[0] + lower {id=temp_sum, dep=sum*:lb1:init, atomic} + ... lbarrier {id=lb2, dep=temp_sum} + out[j] = upper / temp[0] {dep=sum*:temp_sum:lb2, atomic} + end """, [ lp.GlobalArg("out", dtype, shape=lp.auto, for_atomic=True), - lp.GlobalArg('temp', dtype, shape=lp.auto, for_atomic=True), + lp.GlobalArg("a", dtype, shape=lp.auto), + lp.GlobalArg("b", dtype, shape=lp.auto), + lp.TemporaryVariable('temp', dtype, for_atomic=True, + scope=scopes.GLOBAL, shape=(1,)), "..." - ], - assumptions="n>0") + ]) - ref_knl = knl - knl = lp.split_iname(knl, "i", 512) - knl = lp.split_iname(knl, "i_inner", 128, outer_tag="unr", inner_tag="g.0") - lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=10000)) + knl = lp.split_iname(knl, "j", 512, inner_tag="l.0") + _, out = knl(queue, a=np.arange(n), b=np.arange(n)) def test_within_inames_and_reduction():