diff --git a/loopy/check.py b/loopy/check.py index e72f9e3e6c4db797220729a5f282d4944b31d6ac..8d78e684bc0d793d6c17bbfebefc428d71fbb0b4 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -493,6 +493,7 @@ def check_that_atomic_ops_are_used_exactly_on_atomic_arrays(kernel): accessed_atomic_vars = insn.dependency_names() & atomicity_candidates if not accessed_atomic_vars <= atomic_accesses: + missed = atomic_accesses - accessed_atomic_vars raise LoopyError("atomic variable(s) '%s' in instruction '%s' " "used in non-atomic access" % ( diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index d5c388af60a39987c09092fc93325f067a8f4cf7..1d20ff71c3f13035d2f01b56a69e2d983c7660af 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -603,7 +603,7 @@ class AtomicInit(VarAtomicity): memory_scope.to_string(self.scope)) -class AtomicUpdate(VarAtomicity): +class OrderedAtomic(VarAtomicity): """Properties of an atomic operation. A subclass of :class:`VarAtomicity`. .. attribute:: ordering @@ -624,7 +624,7 @@ class AtomicUpdate(VarAtomicity): """ super(AtomicUpdate, self).update_persistent_hash(key_hash, key_builder) - key_builder.rec(key_hash, "AtomicUpdate") + key_builder.rec(key_hash, str(self.__class__.__name__)) key_builder.rec(key_hash, self.ordering) key_builder.rec(key_hash, self.scope) @@ -634,11 +634,40 @@ class AtomicUpdate(VarAtomicity): and self.scope == other.scope) def __str__(self): - return "update[%s]%s/%s" % ( + return "%s[%s]%s/%s" % ( + self.op_name, self.var_name, memory_ordering.to_string(self.ordering), memory_scope.to_string(self.scope)) + +class AtomicUpdate(VarAtomicity): + """Properties of an atomic update. A subclass of :class:`VarAtomicity`. + + .. attribute:: ordering + + One of the values from :class:`memory_ordering` + + .. attribute:: scope + + One of the values from :class:`memory_scope` + """ + op_name = 'update' + + +class AtomicLoad(VarAtomicity): + """Properties of an atomic load. A subclass of :class:`VarAtomicity`. + + .. attribute:: ordering + + One of the values from :class:`memory_ordering` + + .. attribute:: scope + + One of the values from :class:`memory_scope` + """ + op_name = 'load' + # }}} diff --git a/loopy/preprocess.py b/loopy/preprocess.py index ced1aaaa13ed8275c1e3a376d1c24895287b3239..554bf6b24bfcd8e3d3db8e40dd6535d19984ae8c 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -2020,6 +2020,48 @@ def limit_boostability(kernel): # }}} +# {{{ check for loads of atomic variables + +def check_atomic_loads(kernel): + """Find instances of AtomicInit or AtomicUpdate with use of other atomic + variables to update the atomicity + """ + + logger.debug("%s: check atomic loads" % kernel.name) + from loopy.types import AtomicType + from loopy.kernel.array import ArrayBase + from loopy.kernel.instruction import Assignment, AtomicLoad + + # find atomic variables + atomicity_candidates = ( + set(v.name for v in six.itervalues(kernel.temporary_variables) + if isinstance(v.dtype, AtomicType)) + | + set(v.name for v in kernel.args + if isinstance(v, ArrayBase) + and isinstance(v.dtype, AtomicType))) + + new_insns = [] + for insn in kernel.instructions: + if isinstance(insn, Assignment): + # look for atomic variables + atomic_accesses = set(a.var_name for a in insn.atomicity) + accessed_atomic_vars = insn.dependency_names() & atomicity_candidates + if not accessed_atomic_vars <= atomic_accesses: + #if we're missing some + missed = accessed_atomic_vars - atomic_accesses + for x in missed: + if set([x]) & atomicity_candidates: + insn = insn.copy( + atomicity=insn.atomicity + (AtomicLoad(x),)) + + new_insns.append(insn) + + return kernel.copy(instructions=new_insns) + +# }}} + + preprocess_cache = PersistentDict("loopy-preprocess-cache-v2-"+DATA_MODEL_VERSION, key_builder=LoopyKeyBuilder()) @@ -2104,6 +2146,10 @@ def preprocess_kernel(kernel, device=None): kernel = find_idempotence(kernel) kernel = limit_boostability(kernel) + # check for atomic loads, much easier to do here now that the dependencies + # have been established + kernel = check_atomic_loads(kernel) + kernel = kernel.target.preprocess(kernel) logger.info("%s: preprocess done" % kernel.name) diff --git a/test/test_loopy.py b/test/test_loopy.py index 3593019ad2ca7e41f7db4c95616184e1e8972125..f767197b09e2cddebf5e7b1eb9fc25da23e17746 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1011,6 +1011,40 @@ def test_atomic(ctx_factory, dtype): lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=10000)) +def test_atomic_load(ctx_factory): + dtype = np.int32 + ctx = ctx_factory() + + if ( + np.dtype(dtype).itemsize == 8 + and "cl_khr_int64_base_atomics" not in ctx.devices[0].extensions): + pytest.skip("64-bit atomics not supported on device") + + import pyopencl.version # noqa + if ( + cl.version.VERSION < (2015, 2) + and dtype == np.int64): + pytest.skip("int64 RNG not supported in PyOpenCL < 2015.2") + + knl = lp.make_kernel( + "{ [i]: 0<=i<n }", + """ + temp[0] = 5 {id=init, atomic} + out[i%20] = out[i%20] + temp[0] {dep=init, nosync=init, atomic} + """, + [ + lp.GlobalArg("out", dtype, shape=lp.auto, for_atomic=True), + lp.GlobalArg('temp', dtype, shape=lp.auto, for_atomic=True), + "..." + ], + assumptions="n>0") + + ref_knl = knl + knl = lp.split_iname(knl, "i", 512) + knl = lp.split_iname(knl, "i_inner", 128, outer_tag="unr", inner_tag="g.0") + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=10000)) + + def test_within_inames_and_reduction(): # See https://github.com/inducer/loopy/issues/24