diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index d206faad5bd84e3a1c7e7c061673f3d5d1144c84..789c00d33b7bb41816e6901e24046d4b0eefb27d 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -1,8 +1,6 @@ """Loop nest build top-level control/hoisting.""" -from __future__ import division -from __future__ import absolute_import -import six +from __future__ import division, absolute_import __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -26,12 +24,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - +import six from loopy.codegen.result import merge_codegen_results, wrap_in_if import islpy as isl from loopy.schedule import ( EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel, gather_schedule_block, generate_sub_sched_items) +from loopy.diagnostic import LoopyError def get_admissible_conditional_inames_for(codegen_state, sched_index): @@ -150,15 +149,36 @@ def generate_code_for_sched_index(codegen_state, sched_index): return func(codegen_state, sched_index) elif isinstance(sched_item, Barrier): + # {{{ emit barrier code + + from loopy.codegen.result import CodeGenerationResult + if codegen_state.is_generating_device_code: - return codegen_state.ast_builder.emit_barrier( + barrier_ast = codegen_state.ast_builder.emit_barrier( sched_item.kind, sched_item.comment) - from loopy.codegen.result import CodeGenerationResult - return CodeGenerationResult( - host_program=None, - device_programs=[], - implemented_domains={}, - implemented_data_info=codegen_state.implemented_data_info) + if sched_item.originating_insn_id: + return CodeGenerationResult.new( + codegen_state, + sched_item.originating_insn_id, + barrier_ast, + codegen_state.implemented_domain) + else: + return barrier_ast + else: + # host code + if sched_item.kind in ["global", "local"]: + # host code is assumed globally and locally synchronous + return CodeGenerationResult( + host_program=None, + device_programs=[], + implemented_domains={}, + implemented_data_info=codegen_state.implemented_data_info) + + else: + raise LoopyError("do not know how to emit code for barrier kind '%s'" + "in host code" % sched_item.kind) + + # }}} elif isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 3bffc854603e9d02d0c1b68c05da609aacda6e55..89cb5f26a4940656cca1ab09841311148e113275 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -582,6 +582,9 @@ def parse_special_insn(groups, insn_options): if special_insn_kind == "gbarrier": cls = BarrierInstruction kwargs["kind"] = "global" + elif special_insn_kind == "lbarrier": + cls = BarrierInstruction + kwargs["kind"] = "local" elif special_insn_kind == "nop": cls = NoOpInstruction else: diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 08268ca9f27623a6d17a195d3c04acb55e5ec68a..d5c388af60a39987c09092fc93325f067a8f4cf7 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1312,11 +1312,12 @@ class BarrierInstruction(_DataObliviousInstruction): .. attribute:: kind - A string, currently only ``"global"``. + A string, ``"global"`` or ``"local"``. The textual syntax in a :mod:`loopy` kernel is:: ... gbarrier + ... lbarrier """ fields = _DataObliviousInstruction.fields | set(["kind"]) @@ -1328,7 +1329,6 @@ class BarrierInstruction(_DataObliviousInstruction): priority=None, boostable=None, boostable_into=None, predicates=None, tags=None, kind="global"): - assert kind == "global" if predicates: raise LoopyError("conditional barriers are not supported") diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index f27ae49e80bc19989cf6e4e002c03ebb7a637b78..4281e50bd006a3cddf5a3cae0ffffe3d78abcfac 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -732,13 +732,15 @@ def generate_loop_schedules_internal( # }}} - # {{{ see if there are pending local barriers in the preschedule + # {{{ see if there are pending barriers in the preschedule - # Local barriers do not have associated instructions, so they need to - # be handled separately from instructions. + # Barriers that do not have an originating instruction are handled here. + # (These are automatically inserted by insert_barriers().) Barriers with + # originating instructions are handled as part of normal instruction + # scheduling below. if ( isinstance(next_preschedule_item, Barrier) - and next_preschedule_item.kind == "local"): + and next_preschedule_item.originating_insn_id is None): for result in generate_loop_schedules_internal( sched_state.copy( schedule=sched_state.schedule + (next_preschedule_item,), @@ -814,10 +816,7 @@ def generate_loop_schedules_internal( if insn_id in sched_state.prescheduled_insn_ids: if isinstance(next_preschedule_item, RunInstruction): next_preschedule_insn_id = next_preschedule_item.insn_id - elif ( - isinstance(next_preschedule_item, Barrier) - and next_preschedule_item.kind == "global"): - assert hasattr(next_preschedule_item, "originating_insn_id") + elif isinstance(next_preschedule_item, Barrier): assert next_preschedule_item.originating_insn_id is not None next_preschedule_insn_id = next_preschedule_item.originating_insn_id else: @@ -1606,7 +1605,10 @@ def append_barrier_or_raise_error(schedule, dep, verify_only): comment = "for %s (%s)" % ( dep.variable, dep.dep_descr.format( tgt=dep.target.id, src=dep.source.id)) - schedule.append(Barrier(comment=comment, kind=dep.var_kind)) + schedule.append(Barrier( + comment=comment, + kind=dep.var_kind, + originating_insn_id=None)) def insert_barriers(kernel, schedule, kind, verify_only, level=0): diff --git a/test/test_loopy.py b/test/test_loopy.py index 78633abbd41408ae700aa8516e8a9c6f70f018a9..3593019ad2ca7e41f7db4c95616184e1e8972125 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1046,6 +1046,24 @@ def test_within_inames_and_reduction(): print(k.stringify(with_dependencies=True)) +def test_literal_local_barrier(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel( + "{ [i]: 0<=it[2*j] = j + ... lbarrier + t[2*j+1] = t[2*j] + ... gbarrier + out[i] = t[2*i] + end + """, seq_dependencies=True) + + knl = lp.set_temporary_scope(knl, "t", "local") + knl = lp.tag_inames(knl, dict(i="g.0", j="l.0")) + + save_and_reload_temporaries_test(queue, knl, np.arange(8), debug) + + def test_save_local_multidim_array(ctx_factory, debug=False): ctx = ctx_factory() queue = cl.CommandQueue(ctx)