From 432c1060159eac7536a43f9594196a6c894bd22e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 28 Jul 2017 17:09:31 -0500 Subject: [PATCH 1/3] Make a way to spell literal local barriers --- loopy/codegen/control.py | 42 +++++++++++++++++++++++++++---------- loopy/kernel/creation.py | 3 +++ loopy/kernel/instruction.py | 4 ++-- test/test_loopy.py | 18 ++++++++++++++++ 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index d206faad5..789c00d33 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -1,8 +1,6 @@ """Loop nest build top-level control/hoisting.""" -from __future__ import division -from __future__ import absolute_import -import six +from __future__ import division, absolute_import __copyright__ = "Copyright (C) 2012 Andreas Kloeckner" @@ -26,12 +24,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - +import six from loopy.codegen.result import merge_codegen_results, wrap_in_if import islpy as isl from loopy.schedule import ( EnterLoop, LeaveLoop, RunInstruction, Barrier, CallKernel, gather_schedule_block, generate_sub_sched_items) +from loopy.diagnostic import LoopyError def get_admissible_conditional_inames_for(codegen_state, sched_index): @@ -150,15 +149,36 @@ def generate_code_for_sched_index(codegen_state, sched_index): return func(codegen_state, sched_index) elif isinstance(sched_item, Barrier): + # {{{ emit barrier code + + from loopy.codegen.result import CodeGenerationResult + if codegen_state.is_generating_device_code: - return codegen_state.ast_builder.emit_barrier( + barrier_ast = codegen_state.ast_builder.emit_barrier( sched_item.kind, sched_item.comment) - from loopy.codegen.result import CodeGenerationResult - return CodeGenerationResult( - host_program=None, - device_programs=[], - implemented_domains={}, - implemented_data_info=codegen_state.implemented_data_info) + if sched_item.originating_insn_id: + return CodeGenerationResult.new( + codegen_state, + sched_item.originating_insn_id, + barrier_ast, + codegen_state.implemented_domain) + else: + return barrier_ast + else: + # host code + if sched_item.kind in ["global", "local"]: + # host code is assumed globally and locally synchronous + return CodeGenerationResult( + host_program=None, + device_programs=[], + implemented_domains={}, + implemented_data_info=codegen_state.implemented_data_info) + + else: + raise LoopyError("do not know how to emit code for barrier kind '%s'" + "in host code" % sched_item.kind) + + # }}} elif isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 3bffc8546..89cb5f26a 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -582,6 +582,9 @@ def parse_special_insn(groups, insn_options): if special_insn_kind == "gbarrier": cls = BarrierInstruction kwargs["kind"] = "global" + elif special_insn_kind == "lbarrier": + cls = BarrierInstruction + kwargs["kind"] = "local" elif special_insn_kind == "nop": cls = NoOpInstruction else: diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 08268ca9f..d5c388af6 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1312,11 +1312,12 @@ class BarrierInstruction(_DataObliviousInstruction): .. attribute:: kind - A string, currently only ``"global"``. + A string, ``"global"`` or ``"local"``. The textual syntax in a :mod:`loopy` kernel is:: ... gbarrier + ... lbarrier """ fields = _DataObliviousInstruction.fields | set(["kind"]) @@ -1328,7 +1329,6 @@ class BarrierInstruction(_DataObliviousInstruction): priority=None, boostable=None, boostable_into=None, predicates=None, tags=None, kind="global"): - assert kind == "global" if predicates: raise LoopyError("conditional barriers are not supported") diff --git a/test/test_loopy.py b/test/test_loopy.py index 78633abbd..49679ce5b 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1046,6 +1046,24 @@ def test_within_inames_and_reduction(): print(k.stringify(with_dependencies=True)) +def test_literal_local_barrier(ctx_factory): + ctx = ctx_factory() + + knl = lp.make_kernel( + "{ [i]: 0<=i Date: Fri, 28 Jul 2017 17:22:09 -0500 Subject: [PATCH 2/3] Make sure all Barrier schedule items have originating_insn_id --- loopy/schedule/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index f27ae49e8..29c208766 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1606,7 +1606,10 @@ def append_barrier_or_raise_error(schedule, dep, verify_only): comment = "for %s (%s)" % ( dep.variable, dep.dep_descr.format( tgt=dep.target.id, src=dep.source.id)) - schedule.append(Barrier(comment=comment, kind=dep.var_kind)) + schedule.append(Barrier( + comment=comment, + kind=dep.var_kind, + originating_insn_id=None)) def insert_barriers(kernel, schedule, kind, verify_only, level=0): -- GitLab From ae1ef803230728a559d2cb6c0aacfa73020814e9 Mon Sep 17 00:00:00 2001 From: Matt Wala Date: Sat, 29 Jul 2017 20:30:57 -0500 Subject: [PATCH 3/3] Fix scheduler for incremental scheduling of literal local barriers. --- loopy/schedule/__init__.py | 15 +++++++-------- test/test_loopy.py | 22 ++++++++++++++++++++++ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 29c208766..4281e50bd 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -732,13 +732,15 @@ def generate_loop_schedules_internal( # }}} - # {{{ see if there are pending local barriers in the preschedule + # {{{ see if there are pending barriers in the preschedule - # Local barriers do not have associated instructions, so they need to - # be handled separately from instructions. + # Barriers that do not have an originating instruction are handled here. + # (These are automatically inserted by insert_barriers().) Barriers with + # originating instructions are handled as part of normal instruction + # scheduling below. if ( isinstance(next_preschedule_item, Barrier) - and next_preschedule_item.kind == "local"): + and next_preschedule_item.originating_insn_id is None): for result in generate_loop_schedules_internal( sched_state.copy( schedule=sched_state.schedule + (next_preschedule_item,), @@ -814,10 +816,7 @@ def generate_loop_schedules_internal( if insn_id in sched_state.prescheduled_insn_ids: if isinstance(next_preschedule_item, RunInstruction): next_preschedule_insn_id = next_preschedule_item.insn_id - elif ( - isinstance(next_preschedule_item, Barrier) - and next_preschedule_item.kind == "global"): - assert hasattr(next_preschedule_item, "originating_insn_id") + elif isinstance(next_preschedule_item, Barrier): assert next_preschedule_item.originating_insn_id is not None next_preschedule_insn_id = next_preschedule_item.originating_insn_id else: diff --git a/test/test_loopy.py b/test/test_loopy.py index 49679ce5b..3593019ad 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1335,6 +1335,28 @@ def test_save_of_local_array(ctx_factory, debug=False): save_and_reload_temporaries_test(queue, knl, np.arange(8), debug) +def test_save_of_local_array_with_explicit_local_barrier(ctx_factory, debug=False): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + knl = lp.make_kernel( + "{ [i,j]: 0<=i,j<8 }", + """ + for i, j + <>t[2*j] = j + ... lbarrier + t[2*j+1] = t[2*j] + ... gbarrier + out[i] = t[2*i] + end + """, seq_dependencies=True) + + knl = lp.set_temporary_scope(knl, "t", "local") + knl = lp.tag_inames(knl, dict(i="g.0", j="l.0")) + + save_and_reload_temporaries_test(queue, knl, np.arange(8), debug) + + def test_save_local_multidim_array(ctx_factory, debug=False): ctx = ctx_factory() queue = cl.CommandQueue(ctx) -- GitLab