From 432c1060159eac7536a43f9594196a6c894bd22e Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 28 Jul 2017 17:09:31 -0500
Subject: [PATCH] Make a way to spell literal local barriers

---
 loopy/codegen/control.py    | 42 +++++++++++++++++++++++++++----------
 loopy/kernel/creation.py    |  3 +++
 loopy/kernel/instruction.py |  4 ++--
 test/test_loopy.py          | 18 ++++++++++++++++
 4 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py
index d206faad5..789c00d33 100644
--- a/loopy/codegen/control.py
+++ b/loopy/codegen/control.py
@@ -1,8 +1,6 @@
 """Loop nest build top-level control/hoisting."""
 
-from __future__ import division
-from __future__ import absolute_import
-import six
+from __future__ import division, absolute_import
 
 __copyright__ = "Copyright (C) 2012 Andreas Kloeckner"
 
@@ -26,12 +24,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
-
+import six
 from loopy.codegen.result import merge_codegen_results, wrap_in_if
 import islpy as isl
 from loopy.schedule import (
         EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel,
         gather_schedule_block, generate_sub_sched_items)
+from loopy.diagnostic import LoopyError
 
 
 def get_admissible_conditional_inames_for(codegen_state, sched_index):
@@ -150,15 +149,36 @@ def generate_code_for_sched_index(codegen_state, sched_index):
         return func(codegen_state, sched_index)
 
     elif isinstance(sched_item, Barrier):
+        # {{{ emit barrier code
+
+        from loopy.codegen.result import CodeGenerationResult
+
         if codegen_state.is_generating_device_code:
-            return codegen_state.ast_builder.emit_barrier(
+            barrier_ast = codegen_state.ast_builder.emit_barrier(
                     sched_item.kind, sched_item.comment)
-        from loopy.codegen.result import CodeGenerationResult
-        return CodeGenerationResult(
-                host_program=None,
-                device_programs=[],
-                implemented_domains={},
-                implemented_data_info=codegen_state.implemented_data_info)
+            if sched_item.originating_insn_id:
+                return CodeGenerationResult.new(
+                        codegen_state,
+                        sched_item.originating_insn_id,
+                        barrier_ast,
+                        codegen_state.implemented_domain)
+            else:
+                return barrier_ast
+        else:
+            # host code
+            if sched_item.kind in ["global", "local"]:
+                # host code is assumed globally and locally synchronous
+                return CodeGenerationResult(
+                        host_program=None,
+                        device_programs=[],
+                        implemented_domains={},
+                        implemented_data_info=codegen_state.implemented_data_info)
+
+            else:
+                raise LoopyError("do not know how to emit code for barrier kind '%s'"
+                        "in host code" % sched_item.kind)
+
+        # }}}
 
     elif isinstance(sched_item, RunInstruction):
         insn = kernel.id_to_insn[sched_item.insn_id]
diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py
index 3bffc8546..89cb5f26a 100644
--- a/loopy/kernel/creation.py
+++ b/loopy/kernel/creation.py
@@ -582,6 +582,9 @@ def parse_special_insn(groups, insn_options):
     if special_insn_kind == "gbarrier":
         cls = BarrierInstruction
         kwargs["kind"] = "global"
+    elif special_insn_kind == "lbarrier":
+        cls = BarrierInstruction
+        kwargs["kind"] = "local"
     elif special_insn_kind == "nop":
         cls = NoOpInstruction
     else:
diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py
index 08268ca9f..d5c388af6 100644
--- a/loopy/kernel/instruction.py
+++ b/loopy/kernel/instruction.py
@@ -1312,11 +1312,12 @@ class BarrierInstruction(_DataObliviousInstruction):
 
     .. attribute:: kind
 
-        A string, currently only ``"global"``.
+        A string, ``"global"`` or ``"local"``.
 
     The textual syntax in a :mod:`loopy` kernel is::
 
         ... gbarrier
+        ... lbarrier
     """
 
     fields = _DataObliviousInstruction.fields | set(["kind"])
@@ -1328,7 +1329,6 @@ class BarrierInstruction(_DataObliviousInstruction):
             priority=None,
             boostable=None, boostable_into=None,
             predicates=None, tags=None, kind="global"):
-        assert kind == "global"
 
         if predicates:
             raise LoopyError("conditional barriers are not supported")
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 78633abbd..49679ce5b 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -1046,6 +1046,24 @@ def test_within_inames_and_reduction():
     print(k.stringify(with_dependencies=True))
 
 
+def test_literal_local_barrier(ctx_factory):
+    ctx = ctx_factory()
+
+    knl = lp.make_kernel(
+            "{ [i]: 0<=i<n }",
+            """
+            for i
+                ... lbarrier
+            end
+            """, seq_dependencies=True)
+
+    knl = lp.fix_parameters(knl, n=128)
+
+    ref_knl = knl
+
+    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5))
+
+
 def test_kernel_splitting(ctx_factory):
     ctx = ctx_factory()
 
-- 
GitLab