From 432c1060159eac7536a43f9594196a6c894bd22e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Fri, 28 Jul 2017 17:09:31 -0500 Subject: [PATCH] Make a way to spell literal local barriers --- loopy/codegen/control.py | 42 +++++++++++++++++++++++++++---------- loopy/kernel/creation.py | 3 +++ loopy/kernel/instruction.py | 4 ++-- test/test_loopy.py | 18 ++++++++++++++++ 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index d206faad5..789c00d33 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -1,8 +1,6 @@ """Loop nest build top-level control/hoisting.""" -from __future__ import division -from __future__ import absolute_import -import six +from __future__ import division, absolute_import __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -26,12 +24,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - +import six from loopy.codegen.result import merge_codegen_results, wrap_in_if import islpy as isl from loopy.schedule import ( EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel, gather_schedule_block, generate_sub_sched_items) +from loopy.diagnostic import LoopyError def get_admissible_conditional_inames_for(codegen_state, sched_index): @@ -150,15 +149,36 @@ def generate_code_for_sched_index(codegen_state, sched_index): return func(codegen_state, sched_index) elif isinstance(sched_item, Barrier): + # {{{ emit barrier code + + from loopy.codegen.result import CodeGenerationResult + if codegen_state.is_generating_device_code: - return codegen_state.ast_builder.emit_barrier( + barrier_ast = codegen_state.ast_builder.emit_barrier( sched_item.kind, sched_item.comment) - from loopy.codegen.result import CodeGenerationResult - return CodeGenerationResult( - host_program=None, - device_programs=[], - implemented_domains={}, - implemented_data_info=codegen_state.implemented_data_info) + if sched_item.originating_insn_id: + return CodeGenerationResult.new( + codegen_state, + sched_item.originating_insn_id, + barrier_ast, + codegen_state.implemented_domain) + else: + return barrier_ast + else: + # host code + if sched_item.kind in ["global", "local"]: + # host code is assumed globally and locally synchronous + return CodeGenerationResult( + host_program=None, + device_programs=[], + implemented_domains={}, + implemented_data_info=codegen_state.implemented_data_info) + + else: + raise LoopyError("do not know how to emit code for barrier kind '%s'" + "in host code" % sched_item.kind) + + # }}} elif isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 3bffc8546..89cb5f26a 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -582,6 +582,9 @@ def parse_special_insn(groups, insn_options): if special_insn_kind == "gbarrier": cls = BarrierInstruction kwargs["kind"] = "global" + elif special_insn_kind == "lbarrier": + cls = BarrierInstruction + kwargs["kind"] = "local" elif special_insn_kind == "nop": cls = NoOpInstruction else: diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 08268ca9f..d5c388af6 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1312,11 +1312,12 @@ class BarrierInstruction(_DataObliviousInstruction): .. attribute:: kind - A string, currently only ``"global"``. + A string, ``"global"`` or ``"local"``. The textual syntax in a :mod:`loopy` kernel is:: ... gbarrier + ... lbarrier """ fields = _DataObliviousInstruction.fields | set(["kind"]) @@ -1328,7 +1329,6 @@ class BarrierInstruction(_DataObliviousInstruction): priority=None, boostable=None, boostable_into=None, predicates=None, tags=None, kind="global"): - assert kind == "global" if predicates: raise LoopyError("conditional barriers are not supported") diff --git a/test/test_loopy.py b/test/test_loopy.py index 78633abbd..49679ce5b 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1046,6 +1046,24 @@ def test_within_inames_and_reduction(): print(k.stringify(with_dependencies=True)) +def test_literal_local_barrier(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel( + "{ [i]: 0<=i<n }", + """ + for i + ... lbarrier + end + """, seq_dependencies=True) + + knl = lp.fix_parameters(knl, n=128) + + ref_knl = knl + + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5)) + + def test_kernel_splitting(ctx_factory): ctx = ctx_factory() -- GitLab