diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index d206faad5bd84e3a1c7e7c061673f3d5d1144c84..789c00d33b7bb41816e6901e24046d4b0eefb27d 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -1,8 +1,6 @@ """Loop nest build top-level control/hoisting.""" -from __future__ import division -from __future__ import absolute_import -import six +from __future__ import division, absolute_import __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -26,12 +24,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - +import six from loopy.codegen.result import merge_codegen_results, wrap_in_if import islpy as isl from loopy.schedule import ( EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel, gather_schedule_block, generate_sub_sched_items) +from loopy.diagnostic import LoopyError def get_admissible_conditional_inames_for(codegen_state, sched_index): @@ -150,15 +149,36 @@ def generate_code_for_sched_index(codegen_state, sched_index): return func(codegen_state, sched_index) elif isinstance(sched_item, Barrier): + # {{{ emit barrier code + + from loopy.codegen.result import CodeGenerationResult + if codegen_state.is_generating_device_code: - return codegen_state.ast_builder.emit_barrier( + barrier_ast = codegen_state.ast_builder.emit_barrier( sched_item.kind, sched_item.comment) - from loopy.codegen.result import CodeGenerationResult - return CodeGenerationResult( - host_program=None, - device_programs=[], - implemented_domains={}, - implemented_data_info=codegen_state.implemented_data_info) + if sched_item.originating_insn_id: + return CodeGenerationResult.new( + codegen_state, + sched_item.originating_insn_id, + barrier_ast, + codegen_state.implemented_domain) + else: + return barrier_ast + else: + # host code + if sched_item.kind in ["global", "local"]: + # host code is assumed globally and locally synchronous + return CodeGenerationResult( + host_program=None, + device_programs=[], + implemented_domains={}, + implemented_data_info=codegen_state.implemented_data_info) + + else: + raise LoopyError("do not know how to emit code for barrier kind '%s'" + "in host code" % sched_item.kind) + + # }}} elif isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 3bffc854603e9d02d0c1b68c05da609aacda6e55..89cb5f26a4940656cca1ab09841311148e113275 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -582,6 +582,9 @@ def parse_special_insn(groups, insn_options): if special_insn_kind == "gbarrier": cls = BarrierInstruction kwargs["kind"] = "global" + elif special_insn_kind == "lbarrier": + cls = BarrierInstruction + kwargs["kind"] = "local" elif special_insn_kind == "nop": cls = NoOpInstruction else: diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 08268ca9f27623a6d17a195d3c04acb55e5ec68a..d5c388af60a39987c09092fc93325f067a8f4cf7 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1312,11 +1312,12 @@ class BarrierInstruction(_DataObliviousInstruction): .. attribute:: kind - A string, currently only ``"global"``. + A string, ``"global"`` or ``"local"``. The textual syntax in a :mod:`loopy` kernel is:: ... gbarrier + ... lbarrier """ fields = _DataObliviousInstruction.fields | set(["kind"]) @@ -1328,7 +1329,6 @@ class BarrierInstruction(_DataObliviousInstruction): priority=None, boostable=None, boostable_into=None, predicates=None, tags=None, kind="global"): - assert kind == "global" if predicates: raise LoopyError("conditional barriers are not supported") diff --git a/test/test_loopy.py b/test/test_loopy.py index 78633abbd41408ae700aa8516e8a9c6f70f018a9..49679ce5b40fdc36dd3cea894784188d8743b936 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1046,6 +1046,24 @@ def test_within_inames_and_reduction(): print(k.stringify(with_dependencies=True)) +def test_literal_local_barrier(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel( + "{ [i]: 0<=i<n }", + """ + for i + ... lbarrier + end + """, seq_dependencies=True) + + knl = lp.fix_parameters(knl, n=128) + + ref_knl = knl + + lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5)) + + def test_kernel_splitting(ctx_factory): ctx = ctx_factory()