From 1e257dfdc9c5793f886b59908061229d1bacdc32 Mon Sep 17 00:00:00 2001
From: arghdos <arghdos@gmail.com>
Date: Thu, 17 Aug 2017 15:22:18 -0400
Subject: [PATCH] add ability to load atomics along the lines of c11 standard

---
 loopy/check.py              |  1 +
 loopy/kernel/instruction.py | 35 +++++++++++++++++++++++++---
 loopy/preprocess.py         | 46 +++++++++++++++++++++++++++++++++++++
 test/test_loopy.py          | 34 +++++++++++++++++++++++++++
 4 files changed, 113 insertions(+), 3 deletions(-)

diff --git a/loopy/check.py b/loopy/check.py
index e72f9e3e6..8d78e684b 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -493,6 +493,7 @@ def check_that_atomic_ops_are_used_exactly_on_atomic_arrays(kernel):
 
         accessed_atomic_vars = insn.dependency_names() & atomicity_candidates
         if not accessed_atomic_vars <= atomic_accesses:
+            missed = atomic_accesses - accessed_atomic_vars
             raise LoopyError("atomic variable(s) '%s' in instruction '%s' "
                     "used in non-atomic access"
                     % (
diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py
index d5c388af6..1d20ff71c 100644
--- a/loopy/kernel/instruction.py
+++ b/loopy/kernel/instruction.py
@@ -603,7 +603,7 @@ class AtomicInit(VarAtomicity):
                 memory_scope.to_string(self.scope))
 
 
-class AtomicUpdate(VarAtomicity):
+class OrderedAtomic(VarAtomicity):
     """Properties of an atomic operation. A subclass of :class:`VarAtomicity`.
 
     .. attribute:: ordering
@@ -624,7 +624,7 @@ class AtomicUpdate(VarAtomicity):
         """
 
         super(AtomicUpdate, self).update_persistent_hash(key_hash, key_builder)
-        key_builder.rec(key_hash, "AtomicUpdate")
+        key_builder.rec(key_hash, str(self.__class__.__name__))
         key_builder.rec(key_hash, self.ordering)
         key_builder.rec(key_hash, self.scope)
 
@@ -634,11 +634,40 @@ class AtomicUpdate(VarAtomicity):
                 and self.scope == other.scope)
 
     def __str__(self):
-        return "update[%s]%s/%s" % (
+        return "%s[%s]%s/%s" % (
+                self.op_name,
                 self.var_name,
                 memory_ordering.to_string(self.ordering),
                 memory_scope.to_string(self.scope))
 
+
+class AtomicUpdate(VarAtomicity):
+    """Properties of an atomic update. A subclass of :class:`VarAtomicity`.
+
+    .. attribute:: ordering
+
+        One of the values from :class:`memory_ordering`
+
+    .. attribute:: scope
+
+        One of the values from :class:`memory_scope`
+    """
+    op_name = 'update'
+
+
+class AtomicLoad(VarAtomicity):
+    """Properties of an atomic load. A subclass of :class:`VarAtomicity`.
+
+    .. attribute:: ordering
+
+        One of the values from :class:`memory_ordering`
+
+    .. attribute:: scope
+
+        One of the values from :class:`memory_scope`
+    """
+    op_name = 'load'
+
 # }}}
 
 
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index ced1aaaa1..554bf6b24 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -2020,6 +2020,48 @@ def limit_boostability(kernel):
 # }}}
 
 
+# {{{ check for loads of atomic variables
+
+def check_atomic_loads(kernel):
+    """Find instances of AtomicInit or AtomicUpdate with use of other atomic
+    variables to update the atomicity
+    """
+
+    logger.debug("%s: check atomic loads" % kernel.name)
+    from loopy.types import AtomicType
+    from loopy.kernel.array import ArrayBase
+    from loopy.kernel.instruction import Assignment, AtomicLoad
+
+    # find atomic variables
+    atomicity_candidates = (
+            set(v.name for v in six.itervalues(kernel.temporary_variables)
+                if isinstance(v.dtype, AtomicType))
+            |
+            set(v.name for v in kernel.args
+                if isinstance(v, ArrayBase)
+                and isinstance(v.dtype, AtomicType)))
+
+    new_insns = []
+    for insn in kernel.instructions:
+        if isinstance(insn, Assignment):
+            # look for atomic variables
+            atomic_accesses = set(a.var_name for a in insn.atomicity)
+            accessed_atomic_vars = insn.dependency_names() & atomicity_candidates
+            if not accessed_atomic_vars <= atomic_accesses:
+                #if we're missing some
+                missed = accessed_atomic_vars - atomic_accesses
+                for x in missed:
+                    if set([x]) & atomicity_candidates:
+                        insn = insn.copy(
+                            atomicity=insn.atomicity + (AtomicLoad(x),))
+
+        new_insns.append(insn)
+
+    return kernel.copy(instructions=new_insns)
+
+# }}}
+
+
 preprocess_cache = PersistentDict("loopy-preprocess-cache-v2-"+DATA_MODEL_VERSION,
         key_builder=LoopyKeyBuilder())
 
@@ -2104,6 +2146,10 @@ def preprocess_kernel(kernel, device=None):
     kernel = find_idempotence(kernel)
     kernel = limit_boostability(kernel)
 
+    # check for atomic loads, much easier to do here now that the dependencies
+    # have been established
+    kernel = check_atomic_loads(kernel)
+
     kernel = kernel.target.preprocess(kernel)
 
     logger.info("%s: preprocess done" % kernel.name)
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 3593019ad..f767197b0 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -1011,6 +1011,40 @@ def test_atomic(ctx_factory, dtype):
     lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=10000))
 
 
+def test_atomic_load(ctx_factory):
+    dtype = np.int32
+    ctx = ctx_factory()
+
+    if (
+            np.dtype(dtype).itemsize == 8
+            and "cl_khr_int64_base_atomics" not in ctx.devices[0].extensions):
+        pytest.skip("64-bit atomics not supported on device")
+
+    import pyopencl.version  # noqa
+    if (
+            cl.version.VERSION < (2015, 2)
+            and dtype == np.int64):
+        pytest.skip("int64 RNG not supported in PyOpenCL < 2015.2")
+
+    knl = lp.make_kernel(
+            "{ [i]: 0<=i<n }",
+            """
+            temp[0] = 5 {id=init, atomic}
+            out[i%20] = out[i%20] + temp[0] {dep=init, nosync=init, atomic}
+            """,
+            [
+                lp.GlobalArg("out", dtype, shape=lp.auto, for_atomic=True),
+                lp.GlobalArg('temp', dtype, shape=lp.auto, for_atomic=True),
+                "..."
+                ],
+            assumptions="n>0")
+
+    ref_knl = knl
+    knl = lp.split_iname(knl, "i", 512)
+    knl = lp.split_iname(knl, "i_inner", 128, outer_tag="unr", inner_tag="g.0")
+    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=10000))
+
+
 def test_within_inames_and_reduction():
     # See https://github.com/inducer/loopy/issues/24
 
-- 
GitLab