From 53335155a51cc691d26d041713438622dfde5255 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 14 Nov 2017 12:48:45 -0500 Subject: [PATCH 01/17] first attempt at enabling global memory barriers for OpenCL --- doc/tutorial.rst | 7 ++++--- loopy/codegen/control.py | 2 +- loopy/kernel/creation.py | 2 ++ loopy/kernel/instruction.py | 19 ++++++++++++++++--- loopy/kernel/tools.py | 2 ++ loopy/schedule/__init__.py | 14 +++++++++++--- loopy/target/__init__.py | 3 ++- loopy/target/cuda.py | 3 ++- loopy/target/ispc.py | 2 +- loopy/target/opencl.py | 6 ++++-- loopy/transform/add_barrier.py | 7 +++++-- loopy/transform/precompute.py | 3 ++- test/test_loopy.py | 22 ++++++++++++++++++++++ 13 files changed, 74 insertions(+), 18 deletions(-) diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 8b8538725..69f895486 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1107,11 +1107,12 @@ work item: :mod:`loopy` supports two kinds of barriers: -* *Local barriers* ensure consistency of local memory accesses to items within +* *Local barriers* ensure consistency of memory accesses to items within *the same* work group. This synchronizes with all instructions in the work - group. + group. The type of memory (local or global) may be specified by the + :attr:`loopy.instruction.BarrierInstruction.mem_kind` -* *Global barriers* ensure consistency of global memory accesses +* *Global barriers* ensure consistency of memory accesses across *all* work groups, i.e. it synchronizes with every work item executing the kernel. Note that there is no exact equivalent for this kind of barrier in OpenCL. [#global-barrier-note]_ diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 524004233..ff5ed4888 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -156,7 +156,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): if codegen_state.is_generating_device_code: barrier_ast = codegen_state.ast_builder.emit_barrier( - sched_item.kind, sched_item.comment) + sched_item.kind, sched_item.mem_kind, sched_item.comment) if sched_item.originating_insn_id: return CodeGenerationResult.new( codegen_state, diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index a50d797fe..ea3f186f2 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -584,6 +584,8 @@ def parse_special_insn(groups, insn_options): kwargs["kind"] = "global" elif special_insn_kind == "lbarrier": cls = BarrierInstruction + kwargs["mem_kind"] = "local" if "mem_kind" not in insn_options else \ + insn_options["mem_kind"] kwargs["kind"] = "local" elif special_insn_kind == "nop": cls = NoOpInstruction diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index e6b093785..25bf47a2d 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1286,13 +1286,22 @@ class BarrierInstruction(_DataObliviousInstruction): A string, ``"global"`` or ``"local"``. + .. attribute:: mem_kind + + A string, ``"global"`` or ``"local"``. Chooses which memory type to + sychronize, for targets that require this (e.g. OpenCL) + The textual syntax in a :mod:`loopy` kernel is:: ... gbarrier ... lbarrier + + Note that the memory type :attr:`mem_kind` can be specified for local barriers:: + + ... lbarrier {mem_kind=global} """ - fields = _DataObliviousInstruction.fields | set(["kind"]) + fields = _DataObliviousInstruction.fields | set(["kind", "mem_kind"]) def __init__(self, id, depends_on=None, depends_on_is_final=None, groups=None, conflicts_with_groups=None, @@ -1300,7 +1309,7 @@ class BarrierInstruction(_DataObliviousInstruction): within_inames_is_final=None, within_inames=None, priority=None, boostable=None, boostable_into=None, - predicates=None, tags=None, kind="global"): + predicates=None, tags=None, kind="global", mem_kind="local"): if predicates: raise LoopyError("conditional barriers are not supported") @@ -1318,15 +1327,19 @@ class BarrierInstruction(_DataObliviousInstruction): boostable=boostable, boostable_into=boostable_into, predicates=predicates, - tags=tags, + tags=tags ) self.kind = kind + self.mem_kind = mem_kind def __str__(self): first_line = "%s: ... %sbarrier" % (self.id, self.kind[0]) options = self.get_str_options() + if self.kind == "local": + # add the memory kind + options += ['mem_kind={}'.format(self.mem_kind)] if options: first_line += " {%s}" % (": ".join(options)) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 02df0f2b4..eb1a1f689 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1502,6 +1502,8 @@ def stringify_instruction_list(kernel): if insn.no_sync_with: options.append("no_sync_with=%s" % ":".join( "%s@%s" % entry for entry in sorted(insn.no_sync_with))) + if isinstance(insn, lp.BarrierInstruction) and insn.kind == 'local': + options.append('mem_kind=%s' % insn.mem_kind) if lhs: core = "%s = %s" % ( diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index abf4d799f..aa8383146 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -88,10 +88,14 @@ class Barrier(ScheduleItem): ``"local"`` or ``"global"`` + .. attribute:: mem_kind + + ``"local"`` or ``"global"`` + .. attribute:: originating_insn_id """ - hash_fields = ["comment", "kind"] + hash_fields = ["comment", "kind", "mem_kind"] __slots__ = hash_fields + ["originating_insn_id"] # }}} @@ -436,9 +440,11 @@ def format_insn(kernel, insn_id): Fore.MAGENTA, str(insn.expression), Style.RESET_ALL, format_insn_id(kernel, insn_id)) elif isinstance(insn, BarrierInstruction): - return "[%s] %s... %sbarrier%s" % ( + mem_kind = '{mem_kind=%s}' % insn.mem_kind if insn.kind[0] == 'local' \ + else '' + return "[%s] %s... %sbarrier%s%s" % ( format_insn_id(kernel, insn_id), - Fore.MAGENTA, insn.kind[0], Style.RESET_ALL) + Fore.MAGENTA, insn.kind[0], mem_kind, Style.RESET_ALL) elif isinstance(insn, NoOpInstruction): return "[%s] %s... nop%s" % ( format_insn_id(kernel, insn_id), @@ -1319,6 +1325,7 @@ def convert_barrier_instructions_to_barriers(kernel, schedule): if isinstance(insn, BarrierInstruction): result.append(Barrier( kind=insn.kind, + mem_kind=insn.kind, originating_insn_id=insn.id, comment="Barrier inserted due to %s" % insn.id)) continue @@ -1608,6 +1615,7 @@ def append_barrier_or_raise_error(schedule, dep, verify_only): schedule.append(Barrier( comment=comment, kind=dep.var_kind, + mem_kind=dep.mem_kind, originating_insn_id=None)) diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 5800a0236..70aa911f1 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -189,9 +189,10 @@ class ASTBuilderBase(object): def add_vector_access(self, access_expr, index): raise NotImplementedError() - def emit_barrier(self, kind, comment): + def emit_barrier(self, kind, mem_kind, comment): """ :arg kind: ``"local"`` or ``"global"`` + :arg mem_kind: ``"local"`` or ``"global"`` """ raise NotImplementedError() diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 2bdffb5aa..d8698191d 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -291,9 +291,10 @@ class CUDACASTBuilder(CASTBuilder): def add_vector_access(self, access_expr, index): return access_expr.a(self._VEC_AXES[index]) - def emit_barrier(self, kind, comment): + def emit_barrier(self, kind, mem_kind, comment): """ :arg kind: ``"local"`` or ``"global"`` + :arg memkind: unused :return: a :class:`loopy.codegen.GeneratedInstruction`. """ if kind == "local": diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 35dade904..0807acaf2 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -288,7 +288,7 @@ class ISPCASTBuilder(CASTBuilder): def add_vector_access(self, access_expr, index): return access_expr[index] - def emit_barrier(self, kind, comment): + def emit_barrier(self, kind, mem_kind, comment): from cgen import Comment, Statement assert comment diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index a5f7562c4..f16bb5c1c 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -450,7 +450,7 @@ class OpenCLCASTBuilder(CASTBuilder): # The 'int' avoids an 'L' suffix for long ints. return access_expr.attr("s%s" % hex(int(index))[2:]) - def emit_barrier(self, kind, comment): + def emit_barrier(self, kind, mem_kind, comment): """ :arg kind: ``"local"`` or ``"global"`` :return: a :class:`loopy.codegen.GeneratedInstruction`. @@ -459,8 +459,10 @@ class OpenCLCASTBuilder(CASTBuilder): if comment: comment = " /* %s */" % comment + mem_kind = mem_kind.upper() + from cgen import Statement - return Statement("barrier(CLK_LOCAL_MEM_FENCE)%s" % comment) + return Statement("barrier(CLK_%s_MEM_FENCE)%s" % (mem_kind, comment)) elif kind == "global": raise LoopyError("OpenCL does not have global barriers") else: diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index 00c99edce..9d61f95d1 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -37,7 +37,7 @@ __doc__ = """ # {{{ add_barrier def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, - tags=None, kind="global"): + tags=None, kind="global", mem_kind="local"): """Takes in a kernel that needs to be added a barrier and returns a kernel which has a barrier inserted into it. It takes input of 2 instructions and then adds a barrier in between those 2 instructions. The expressions can @@ -50,6 +50,8 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, :arg id: String on which the id of the barrier would be based on. :arg tags: The tag of the group to which the barrier must be added :arg kind: Kind of barrier to be added. May be "global" or "local". + :arg kind: Type of memory to be synchronied. May be "global" or "local". Ignored + for "global" bariers """ if id_based_on is None: @@ -65,7 +67,8 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, depends_on_is_final=True, id=id, tags=tags, - kind=kind) + kind=kind, + mem_kind=mem_kind) new_knl = knl.copy(instructions=knl.instructions + [barrier_to_add]) new_knl = add_dependency(kernel=new_knl, diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 6077332c4..47d0a01da 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -811,7 +811,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, barrier_insn = BarrierInstruction( id=barrier_insn_id, depends_on=frozenset([compute_insn_id]), - kind="global") + kind="global", + mem_kind="global") compute_dep_id = barrier_insn_id added_compute_insns.append(barrier_insn) diff --git a/test/test_loopy.py b/test/test_loopy.py index d0398f216..1aba532a1 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1090,6 +1090,28 @@ def test_literal_local_barrier(ctx_factory): lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5)) +def test_local_barrier_mem_kind(): + def __test_type(mtype, expected): + insn = '... lbarrier' + if mtype: + insn += '{mem_kind=%s}' % mtype + knl = lp.make_kernel( + "{ [i]: 0<=i Date: Tue, 14 Nov 2017 12:53:28 -0500 Subject: [PATCH 02/17] fix insn_options parsing --- loopy/kernel/creation.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index ea3f186f2..a7723955d 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -352,6 +352,14 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None): % v) del assignee_name + elif opt_key == "mem_kind": + opt_value = opt_value.lower().strip() + if opt_value not in ['local', 'global']: + raise LoopyError("Unknown memory synchronization type %s specified" + " expected, 'local' or 'global'." + % opt_value) + result["mem_kind"] = opt_value + else: raise ValueError( "unrecognized instruction option '%s' " -- GitLab From e82d68f0f6094a62c52c7159e3e242e21c7410ae Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 14 Nov 2017 13:03:14 -0500 Subject: [PATCH 03/17] no need, either defaults to local or is supplied by user --- loopy/kernel/creation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index a7723955d..cced5b93d 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -592,8 +592,6 @@ def parse_special_insn(groups, insn_options): kwargs["kind"] = "global" elif special_insn_kind == "lbarrier": cls = BarrierInstruction - kwargs["mem_kind"] = "local" if "mem_kind" not in insn_options else \ - insn_options["mem_kind"] kwargs["kind"] = "local" elif special_insn_kind == "nop": cls = NoOpInstruction -- GitLab From fbb50dc170f17e068cad7d2acf1edc1c743990bd Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 14 Nov 2017 13:03:20 -0500 Subject: [PATCH 04/17] fix --- loopy/schedule/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index aa8383146..5120aec32 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1325,7 +1325,7 @@ def convert_barrier_instructions_to_barriers(kernel, schedule): if isinstance(insn, BarrierInstruction): result.append(Barrier( kind=insn.kind, - mem_kind=insn.kind, + mem_kind=insn.mem_kind, originating_insn_id=insn.id, comment="Barrier inserted due to %s" % insn.id)) continue -- GitLab From 0501d95475abc0dcdd10fb735a9bb842e01a657f Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 14 Nov 2017 13:16:04 -0500 Subject: [PATCH 05/17] pull memory kind from variable type --- loopy/schedule/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 5120aec32..b74d3473d 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1615,7 +1615,7 @@ def append_barrier_or_raise_error(schedule, dep, verify_only): schedule.append(Barrier( comment=comment, kind=dep.var_kind, - mem_kind=dep.mem_kind, + mem_kind=dep.var_kind, originating_insn_id=None)) -- GitLab From bab11fb959eeddc6294d76e248d3720a1663b410 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 14 Nov 2017 14:21:54 -0500 Subject: [PATCH 06/17] s/kind/sychronization_kind --- loopy/codegen/control.py | 10 ++++--- loopy/kernel/creation.py | 4 +-- loopy/kernel/instruction.py | 22 +++++++++----- loopy/kernel/tools.py | 11 ++++--- loopy/schedule/__init__.py | 49 +++++++++++++++++++------------- loopy/schedule/device_mapping.py | 2 +- loopy/statistics.py | 3 +- loopy/target/__init__.py | 4 +-- loopy/target/cuda.py | 6 ++-- loopy/target/ispc.py | 6 ++-- loopy/target/opencl.py | 6 ++-- loopy/transform/add_barrier.py | 9 +++--- loopy/transform/precompute.py | 2 +- loopy/transform/save.py | 2 +- 14 files changed, 80 insertions(+), 56 deletions(-) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index ff5ed4888..40526b890 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -156,7 +156,8 @@ def generate_code_for_sched_index(codegen_state, sched_index): if codegen_state.is_generating_device_code: barrier_ast = codegen_state.ast_builder.emit_barrier( - sched_item.kind, sched_item.mem_kind, sched_item.comment) + sched_item.sychronization_kind, sched_item.mem_kind, + sched_item.comment) if sched_item.originating_insn_id: return CodeGenerationResult.new( codegen_state, @@ -167,7 +168,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): return barrier_ast else: # host code - if sched_item.kind in ["global", "local"]: + if sched_item.sychronization_kind in ["global", "local"]: # host code is assumed globally and locally synchronous return CodeGenerationResult( host_program=None, @@ -176,8 +177,9 @@ def generate_code_for_sched_index(codegen_state, sched_index): implemented_data_info=codegen_state.implemented_data_info) else: - raise LoopyError("do not know how to emit code for barrier kind '%s'" - "in host code" % sched_item.kind) + raise LoopyError("do not know how to emit code for barrier " + "sychronization kind '%s'" "in host code" + % sched_item.sychronization_kind) # }}} diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index cced5b93d..ce17621b5 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -589,10 +589,10 @@ def parse_special_insn(groups, insn_options): if special_insn_kind == "gbarrier": cls = BarrierInstruction - kwargs["kind"] = "global" + kwargs["sychronization_kind"] = "global" elif special_insn_kind == "lbarrier": cls = BarrierInstruction - kwargs["kind"] = "local" + kwargs["sychronization_kind"] = "local" elif special_insn_kind == "nop": cls = NoOpInstruction else: diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 25bf47a2d..5df565c5a 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1280,9 +1280,9 @@ class NoOpInstruction(_DataObliviousInstruction): class BarrierInstruction(_DataObliviousInstruction): """An instruction that requires synchronization with all - concurrent work items of :attr:`kind`. + concurrent work items of :attr:`sychronization_kind`. - .. attribute:: kind + .. attribute:: sychronization_kind A string, ``"global"`` or ``"local"``. @@ -1301,7 +1301,8 @@ class BarrierInstruction(_DataObliviousInstruction): ... lbarrier {mem_kind=global} """ - fields = _DataObliviousInstruction.fields | set(["kind", "mem_kind"]) + fields = _DataObliviousInstruction.fields | set(["sychronization_kind", + "mem_kind"]) def __init__(self, id, depends_on=None, depends_on_is_final=None, groups=None, conflicts_with_groups=None, @@ -1309,7 +1310,8 @@ class BarrierInstruction(_DataObliviousInstruction): within_inames_is_final=None, within_inames=None, priority=None, boostable=None, boostable_into=None, - predicates=None, tags=None, kind="global", mem_kind="local"): + predicates=None, tags=None, sychronization_kind="global", + mem_kind="local"): if predicates: raise LoopyError("conditional barriers are not supported") @@ -1330,14 +1332,14 @@ class BarrierInstruction(_DataObliviousInstruction): tags=tags ) - self.kind = kind + self.sychronization_kind = sychronization_kind self.mem_kind = mem_kind def __str__(self): - first_line = "%s: ... %sbarrier" % (self.id, self.kind[0]) + first_line = "%s: ... %sbarrier" % (self.id, self.sychronization_kind[0]) options = self.get_str_options() - if self.kind == "local": + if self.sychronization_kind == "local": # add the memory kind options += ['mem_kind={}'.format(self.mem_kind)] if options: @@ -1345,6 +1347,12 @@ class BarrierInstruction(_DataObliviousInstruction): return first_line + @property + def kind(self): + from warnings import warn + warn("BarrierInstruction.kind is deprecated, use sychronization_kind " + "instead", DeprecationWarning, stacklevel=2) + # }}} diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index eb1a1f689..b98f46998 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1472,7 +1472,7 @@ def stringify_instruction_list(kernel): trailing = [l for l in insn.code.split("\n")] elif isinstance(insn, lp.BarrierInstruction): lhs = "" - rhs = "... %sbarrier" % insn.kind[0] + rhs = "... %sbarrier" % insn.sychronization_kind[0] trailing = [] elif isinstance(insn, lp.NoOpInstruction): @@ -1502,7 +1502,8 @@ def stringify_instruction_list(kernel): if insn.no_sync_with: options.append("no_sync_with=%s" % ":".join( "%s@%s" % entry for entry in sorted(insn.no_sync_with))) - if isinstance(insn, lp.BarrierInstruction) and insn.kind == 'local': + if isinstance(insn, lp.BarrierInstruction) and \ + insn.sychronization_kind == 'local': options.append('mem_kind=%s' % insn.mem_kind) if lhs: @@ -1556,7 +1557,8 @@ def get_global_barrier_order(kernel): def is_barrier(my_insn_id): insn = kernel.id_to_insn[my_insn_id] from loopy.kernel.instruction import BarrierInstruction - return isinstance(insn, BarrierInstruction) and insn.kind == "global" + return isinstance(insn, BarrierInstruction) and \ + insn.sychronization_kind == "global" while unvisited: stack = [unvisited.pop()] @@ -1649,7 +1651,8 @@ def find_most_recent_global_barrier(kernel, insn_id): def is_barrier(my_insn_id): insn = kernel.id_to_insn[my_insn_id] from loopy.kernel.instruction import BarrierInstruction - return isinstance(insn, BarrierInstruction) and insn.kind == "global" + return isinstance(insn, BarrierInstruction) and \ + insn.sychronization_kind == "global" global_barrier_to_ordinal = dict( (b, i) for i, b in enumerate(global_barrier_order)) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index b74d3473d..7ba74770c 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -84,7 +84,7 @@ class Barrier(ScheduleItem): A plain-text comment explaining why the barrier was inserted. - .. attribute:: kind + .. attribute:: sychronization_kind ``"local"`` or ``"global"`` @@ -95,7 +95,7 @@ class Barrier(ScheduleItem): .. attribute:: originating_insn_id """ - hash_fields = ["comment", "kind", "mem_kind"] + hash_fields = ["comment", "sychronization_kind", "mem_kind"] __slots__ = hash_fields + ["originating_insn_id"] # }}} @@ -440,11 +440,13 @@ def format_insn(kernel, insn_id): Fore.MAGENTA, str(insn.expression), Style.RESET_ALL, format_insn_id(kernel, insn_id)) elif isinstance(insn, BarrierInstruction): - mem_kind = '{mem_kind=%s}' % insn.mem_kind if insn.kind[0] == 'local' \ - else '' + mem_kind = '' + if insn.sychronization_kind == 'local': + mem_kind = '{mem_kind=%s}' % insn.mem_kind + return "[%s] %s... %sbarrier%s%s" % ( format_insn_id(kernel, insn_id), - Fore.MAGENTA, insn.kind[0], mem_kind, Style.RESET_ALL) + Fore.MAGENTA, insn.sychronization_kind[0], mem_kind, Style.RESET_ALL) elif isinstance(insn, NoOpInstruction): return "[%s] %s... nop%s" % ( format_insn_id(kernel, insn_id), @@ -485,7 +487,8 @@ def dump_schedule(kernel, schedule): insn_str = sched_item.insn_id lines.append(indent + insn_str) elif isinstance(sched_item, Barrier): - lines.append(indent + "... %sbarrier" % sched_item.kind[0]) + lines.append(indent + "... %sbarrier" % + sched_item.sychronization_kind[0]) else: assert False @@ -839,7 +842,8 @@ def generate_loop_schedules_internal( # {{{ check if scheduler state allows insn scheduling from loopy.kernel.instruction import BarrierInstruction - if isinstance(insn, BarrierInstruction) and insn.kind == "global": + if isinstance(insn, BarrierInstruction) and \ + insn.sychronization_kind == "global": if not sched_state.may_schedule_global_barriers: if debug_mode: print("can't schedule '%s' because global barriers are " @@ -1324,7 +1328,7 @@ def convert_barrier_instructions_to_barriers(kernel, schedule): insn = kernel.id_to_insn[sched_item.insn_id] if isinstance(insn, BarrierInstruction): result.append(Barrier( - kind=insn.kind, + sychronization_kind=insn.sychronization_kind, mem_kind=insn.mem_kind, originating_insn_id=insn.id, comment="Barrier inserted due to %s" % insn.id)) @@ -1584,7 +1588,8 @@ def _insn_ids_reaching_end(schedule, kind, reverse): # end # barrier() # end - if barrier_kind_more_or_equally_global(sched_item.kind, kind): + if barrier_kind_more_or_equally_global( + sched_item.sychronization_kind, kind): insn_ids_alive_at_scope[-1].clear() else: insn_ids_alive_at_scope[-1] |= set( @@ -1614,16 +1619,17 @@ def append_barrier_or_raise_error(schedule, dep, verify_only): tgt=dep.target.id, src=dep.source.id)) schedule.append(Barrier( comment=comment, - kind=dep.var_kind, + sychronization_kind=dep.var_kind, mem_kind=dep.var_kind, originating_insn_id=None)) -def insert_barriers(kernel, schedule, kind, verify_only, level=0): +def insert_barriers(kernel, schedule, sychronization_kind, verify_only, level=0): """ - :arg kind: "local" or "global". The :attr:`Barrier.kind` to be inserted. - Generally, this function will be called once for each kind of barrier - at the top level, where more global barriers should be inserted first. + :arg sychronization_kind: "local" or "global". + The :attr:`Barrier.sychronization_kind` to be inserted. Generally, this + function will be called once for each kind of barrier at the top level, where + more global barriers should be inserted first. :arg verify_only: do not insert barriers, only complain if they are missing. :arg level: the current level of loop nesting, 0 for outermost. @@ -1632,14 +1638,15 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): # {{{ insert barriers at outermost scheduling level def insert_barriers_at_outer_level(schedule, reverse=False): - dep_tracker = DependencyTracker(kernel, var_kind=kind, reverse=reverse) + dep_tracker = DependencyTracker(kernel, var_kind=sychronization_kind, + reverse=reverse) if reverse: # Populate the dependency tracker with sources from the tail end of # the schedule block. for insn_id in ( insn_ids_reaching_end_without_intervening_barrier( - schedule, kind)): + schedule, sychronization_kind)): dep_tracker.add_source(insn_id) result = [] @@ -1653,11 +1660,11 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): loop_head = ( insn_ids_reachable_from_start_without_intervening_barrier( - subloop, kind)) + subloop, sychronization_kind)) loop_tail = ( insn_ids_reaching_end_without_intervening_barrier( - subloop, kind)) + subloop, sychronization_kind)) # Checks if a barrier is needed before the loop. This handles # dependencies with targets that can be reached without an @@ -1696,7 +1703,8 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): elif isinstance(sched_item, Barrier): result.append(sched_item) - if barrier_kind_more_or_equally_global(sched_item.kind, kind): + if barrier_kind_more_or_equally_global( + sched_item.sychronization_kind, sychronization_kind): dep_tracker.discard_all_sources() i += 1 @@ -1732,7 +1740,8 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): if isinstance(sched_item, EnterLoop): subloop, new_i = gather_schedule_block(schedule, i) new_subloop = insert_barriers( - kernel, subloop[1:-1], kind, verify_only, level + 1) + kernel, subloop[1:-1], sychronization_kind, verify_only, + level + 1) result.append(subloop[0]) result.extend(new_subloop) result.append(subloop[-1]) diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index 1a0789c2f..f9c78acba 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -106,7 +106,7 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): [end_item]) elif isinstance(sched_item, Barrier): - if sched_item.kind == "global": + if sched_item.sychronization_kind == "global": # Wrap the current chunk into a kernel call. schedule_required_splitting = True if current_chunk: diff --git a/loopy/statistics.py b/loopy/statistics.py index 88d7ec328..72d0c6c7d 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1417,7 +1417,8 @@ def get_synchronization_map(knl): iname_list.pop() elif isinstance(sched_item, Barrier): - result = result + ToCountMap({"barrier_%s" % sched_item.kind: + result = result + ToCountMap({"barrier_%s" % + sched_item.synchronization_kind: get_count_poly(iname_list)}) elif isinstance(sched_item, CallKernel): diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 70aa911f1..88def69e2 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -189,9 +189,9 @@ class ASTBuilderBase(object): def add_vector_access(self, access_expr, index): raise NotImplementedError() - def emit_barrier(self, kind, mem_kind, comment): + def emit_barrier(self, sychronization_kind, mem_kind, comment): """ - :arg kind: ``"local"`` or ``"global"`` + :arg sychronization_kind: ``"local"`` or ``"global"`` :arg mem_kind: ``"local"`` or ``"global"`` """ raise NotImplementedError() diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index d8698191d..b1ae8427b 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -291,19 +291,19 @@ class CUDACASTBuilder(CASTBuilder): def add_vector_access(self, access_expr, index): return access_expr.a(self._VEC_AXES[index]) - def emit_barrier(self, kind, mem_kind, comment): + def emit_barrier(self, sychronization_kind, mem_kind, comment): """ :arg kind: ``"local"`` or ``"global"`` :arg memkind: unused :return: a :class:`loopy.codegen.GeneratedInstruction`. """ - if kind == "local": + if sychronization_kind == "local": if comment: comment = " /* %s */" % comment from cgen import Statement return Statement("__syncthreads()%s" % comment) - elif kind == "global": + elif sychronization_kind == "global": raise LoopyError("CUDA does not have global barriers") else: raise LoopyError("unknown barrier kind") diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 0807acaf2..2b2196236 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -288,15 +288,15 @@ class ISPCASTBuilder(CASTBuilder): def add_vector_access(self, access_expr, index): return access_expr[index] - def emit_barrier(self, kind, mem_kind, comment): + def emit_barrier(self, sychronization_kind, mem_kind, comment): from cgen import Comment, Statement assert comment - if kind == "local": + if sychronization_kind == "local": return Comment("local barrier: %s" % comment) - elif kind == "global": + elif sychronization_kind == "global": return Statement("sync; /* %s */" % comment) else: diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index f16bb5c1c..d7b570f20 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -450,12 +450,12 @@ class OpenCLCASTBuilder(CASTBuilder): # The 'int' avoids an 'L' suffix for long ints. return access_expr.attr("s%s" % hex(int(index))[2:]) - def emit_barrier(self, kind, mem_kind, comment): + def emit_barrier(self, sychronization_kind, mem_kind, comment): """ :arg kind: ``"local"`` or ``"global"`` :return: a :class:`loopy.codegen.GeneratedInstruction`. """ - if kind == "local": + if sychronization_kind == "local": if comment: comment = " /* %s */" % comment @@ -463,7 +463,7 @@ class OpenCLCASTBuilder(CASTBuilder): from cgen import Statement return Statement("barrier(CLK_%s_MEM_FENCE)%s" % (mem_kind, comment)) - elif kind == "global": + elif sychronization_kind == "global": raise LoopyError("OpenCL does not have global barriers") else: raise LoopyError("unknown barrier kind") diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index 9d61f95d1..9ed90a028 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -37,7 +37,7 @@ __doc__ = """ # {{{ add_barrier def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, - tags=None, kind="global", mem_kind="local"): + tags=None, sychronization_kind="global", mem_kind="local"): """Takes in a kernel that needs to be added a barrier and returns a kernel which has a barrier inserted into it. It takes input of 2 instructions and then adds a barrier in between those 2 instructions. The expressions can @@ -49,13 +49,14 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, the barrier which is to be added :arg id: String on which the id of the barrier would be based on. :arg tags: The tag of the group to which the barrier must be added - :arg kind: Kind of barrier to be added. May be "global" or "local". + :arg sychronization_kind: Kind of barrier to be added. May be "global" or "local" :arg kind: Type of memory to be synchronied. May be "global" or "local". Ignored for "global" bariers """ if id_based_on is None: - id = knl.make_unique_instruction_id(based_on=kind[0]+"_barrier") + id = knl.make_unique_instruction_id( + based_on=sychronization_kind[0]+"_barrier") else: id = knl.make_unique_instruction_id(based_on=id_based_on) @@ -67,7 +68,7 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, depends_on_is_final=True, id=id, tags=tags, - kind=kind, + kind=sychronization_kind, mem_kind=mem_kind) new_knl = knl.copy(instructions=knl.instructions + [barrier_to_add]) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 47d0a01da..6cb5ed718 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -811,7 +811,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, barrier_insn = BarrierInstruction( id=barrier_insn_id, depends_on=frozenset([compute_insn_id]), - kind="global", + sychronization_kind="global", mem_kind="global") compute_dep_id = barrier_insn_id diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 2ba2338b0..97eff0a0d 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -351,7 +351,7 @@ class TemporarySaver(object): self.subkernel_to_slice_indices[subkernel]) def is_global_barrier(item): - return isinstance(item, Barrier) and item.kind == "global" + return isinstance(item, Barrier) and item.sychronization_kind == "global" try: pre_barrier = next(item for item in -- GitLab From 88e7c122832005da482f6e6e7ebebc1098ec6b88 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 14 Nov 2017 14:24:13 -0500 Subject: [PATCH 07/17] fix default --- loopy/transform/add_barrier.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index 9ed90a028..9c2334ed3 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -37,7 +37,7 @@ __doc__ = """ # {{{ add_barrier def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, - tags=None, sychronization_kind="global", mem_kind="local"): + tags=None, sychronization_kind="global", mem_kind=None): """Takes in a kernel that needs to be added a barrier and returns a kernel which has a barrier inserted into it. It takes input of 2 instructions and then adds a barrier in between those 2 instructions. The expressions can @@ -51,9 +51,12 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, :arg tags: The tag of the group to which the barrier must be added :arg sychronization_kind: Kind of barrier to be added. May be "global" or "local" :arg kind: Type of memory to be synchronied. May be "global" or "local". Ignored - for "global" bariers + for "global" bariers. If not supplied, defaults to :arg:`sychronization_kind` """ + if mem_kind is None: + mem_kind = sychronization_kind + if id_based_on is None: id = knl.make_unique_instruction_id( based_on=sychronization_kind[0]+"_barrier") -- GitLab From 70b69072705bdf589e6e23f817281194c5c39012 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 14 Nov 2017 14:32:02 -0500 Subject: [PATCH 08/17] raise if supplied for non-barrier --- loopy/kernel/creation.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index ce17621b5..95064e606 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -172,7 +172,8 @@ from collections import namedtuple _NosyncParseResult = namedtuple("_NosyncParseResult", "expr, scope") -def parse_insn_options(opt_dict, options_str, assignee_names=None): +def parse_insn_options(opt_dict, options_str, assignee_names=None, + insn_kind=None): if options_str is None: return opt_dict @@ -353,6 +354,9 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None): del assignee_name elif opt_key == "mem_kind": + if not insn_kind in ['gbarrier', 'lbarrier']: + raise LoopyError("Cannot supply memory synchronization type to " + "non-barrier instruction %s" % insn_kind) opt_value = opt_value.lower().strip() if opt_value not in ['local', 'global']: raise LoopyError("Unknown memory synchronization type %s specified" @@ -570,7 +574,8 @@ def parse_special_insn(groups, insn_options): insn_options = parse_insn_options( insn_options.copy(), groups["options"], - assignee_names=()) + assignee_names=(), + kind=groups['kind']) del insn_options["atomicity"] -- GitLab From 41321e2e00d01bb04454fde191b0c4d99ab743b7 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 14 Nov 2017 14:35:55 -0500 Subject: [PATCH 09/17] flake --- loopy/kernel/creation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 95064e606..fe3b82437 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -354,7 +354,7 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None, del assignee_name elif opt_key == "mem_kind": - if not insn_kind in ['gbarrier', 'lbarrier']: + if insn_kind not in ['gbarrier', 'lbarrier']: raise LoopyError("Cannot supply memory synchronization type to " "non-barrier instruction %s" % insn_kind) opt_value = opt_value.lower().strip() -- GitLab From 3175d52ed9046f1d2e72973db11cc9b693783394 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 14 Nov 2017 14:38:54 -0500 Subject: [PATCH 10/17] fix --- loopy/schedule/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 7ba74770c..73f17e89c 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1907,11 +1907,11 @@ def generate_loop_schedules_inner(kernel, debug_args={}): if not kernel.options.disable_global_barriers: logger.debug("%s: barrier insertion: global" % kernel.name) gen_sched = insert_barriers(kernel, gen_sched, - kind="global", verify_only=True) + sychronization_kind="global", verify_only=True) logger.debug("%s: barrier insertion: local" % kernel.name) - gen_sched = insert_barriers(kernel, gen_sched, kind="local", - verify_only=False) + gen_sched = insert_barriers(kernel, gen_sched, + sychronization_kind="local", verify_only=False) logger.debug("%s: barrier insertion: done" % kernel.name) new_kernel = kernel.copy( -- GitLab From 22ce888bd02455d7ec23c21256ad64fa6f61961b Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 14 Nov 2017 14:47:22 -0500 Subject: [PATCH 11/17] fix --- loopy/kernel/creation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index fe3b82437..3664fe80f 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -575,7 +575,7 @@ def parse_special_insn(groups, insn_options): insn_options.copy(), groups["options"], assignee_names=(), - kind=groups['kind']) + insn_kind=groups['kind']) del insn_options["atomicity"] -- GitLab From 316cb81fa2839457fdb992783c9ae34988ca18c3 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 14 Nov 2017 15:00:11 -0500 Subject: [PATCH 12/17] Typo --- loopy/codegen/control.py | 8 +++---- loopy/kernel/creation.py | 4 ++-- loopy/kernel/instruction.py | 16 ++++++------- loopy/kernel/tools.py | 8 +++---- loopy/schedule/__init__.py | 40 ++++++++++++++++---------------- loopy/schedule/device_mapping.py | 2 +- loopy/target/__init__.py | 4 ++-- loopy/target/cuda.py | 6 ++--- loopy/target/ispc.py | 6 ++--- loopy/target/opencl.py | 6 ++--- loopy/transform/add_barrier.py | 12 +++++----- loopy/transform/precompute.py | 2 +- loopy/transform/save.py | 2 +- 13 files changed, 58 insertions(+), 58 deletions(-) diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 40526b890..e3e209726 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -156,7 +156,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): if codegen_state.is_generating_device_code: barrier_ast = codegen_state.ast_builder.emit_barrier( - sched_item.sychronization_kind, sched_item.mem_kind, + sched_item.synchronization_kind, sched_item.mem_kind, sched_item.comment) if sched_item.originating_insn_id: return CodeGenerationResult.new( @@ -168,7 +168,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): return barrier_ast else: # host code - if sched_item.sychronization_kind in ["global", "local"]: + if sched_item.synchronization_kind in ["global", "local"]: # host code is assumed globally and locally synchronous return CodeGenerationResult( host_program=None, @@ -178,8 +178,8 @@ def generate_code_for_sched_index(codegen_state, sched_index): else: raise LoopyError("do not know how to emit code for barrier " - "sychronization kind '%s'" "in host code" - % sched_item.sychronization_kind) + "synchronization kind '%s'" "in host code" + % sched_item.synchronization_kind) # }}} diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 3664fe80f..2c15b4789 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -594,10 +594,10 @@ def parse_special_insn(groups, insn_options): if special_insn_kind == "gbarrier": cls = BarrierInstruction - kwargs["sychronization_kind"] = "global" + kwargs["synchronization_kind"] = "global" elif special_insn_kind == "lbarrier": cls = BarrierInstruction - kwargs["sychronization_kind"] = "local" + kwargs["synchronization_kind"] = "local" elif special_insn_kind == "nop": cls = NoOpInstruction else: diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 5df565c5a..9005d40de 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1280,9 +1280,9 @@ class NoOpInstruction(_DataObliviousInstruction): class BarrierInstruction(_DataObliviousInstruction): """An instruction that requires synchronization with all - concurrent work items of :attr:`sychronization_kind`. + concurrent work items of :attr:`synchronization_kind`. - .. attribute:: sychronization_kind + .. attribute:: synchronization_kind A string, ``"global"`` or ``"local"``. @@ -1301,7 +1301,7 @@ class BarrierInstruction(_DataObliviousInstruction): ... lbarrier {mem_kind=global} """ - fields = _DataObliviousInstruction.fields | set(["sychronization_kind", + fields = _DataObliviousInstruction.fields | set(["synchronization_kind", "mem_kind"]) def __init__(self, id, depends_on=None, depends_on_is_final=None, @@ -1310,7 +1310,7 @@ class BarrierInstruction(_DataObliviousInstruction): within_inames_is_final=None, within_inames=None, priority=None, boostable=None, boostable_into=None, - predicates=None, tags=None, sychronization_kind="global", + predicates=None, tags=None, synchronization_kind="global", mem_kind="local"): if predicates: @@ -1332,14 +1332,14 @@ class BarrierInstruction(_DataObliviousInstruction): tags=tags ) - self.sychronization_kind = sychronization_kind + self.synchronization_kind = synchronization_kind self.mem_kind = mem_kind def __str__(self): - first_line = "%s: ... %sbarrier" % (self.id, self.sychronization_kind[0]) + first_line = "%s: ... %sbarrier" % (self.id, self.synchronization_kind[0]) options = self.get_str_options() - if self.sychronization_kind == "local": + if self.synchronization_kind == "local": # add the memory kind options += ['mem_kind={}'.format(self.mem_kind)] if options: @@ -1350,7 +1350,7 @@ class BarrierInstruction(_DataObliviousInstruction): @property def kind(self): from warnings import warn - warn("BarrierInstruction.kind is deprecated, use sychronization_kind " + warn("BarrierInstruction.kind is deprecated, use synchronization_kind " "instead", DeprecationWarning, stacklevel=2) # }}} diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index b98f46998..a65e7fb4c 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1472,7 +1472,7 @@ def stringify_instruction_list(kernel): trailing = [l for l in insn.code.split("\n")] elif isinstance(insn, lp.BarrierInstruction): lhs = "" - rhs = "... %sbarrier" % insn.sychronization_kind[0] + rhs = "... %sbarrier" % insn.synchronization_kind[0] trailing = [] elif isinstance(insn, lp.NoOpInstruction): @@ -1503,7 +1503,7 @@ def stringify_instruction_list(kernel): options.append("no_sync_with=%s" % ":".join( "%s@%s" % entry for entry in sorted(insn.no_sync_with))) if isinstance(insn, lp.BarrierInstruction) and \ - insn.sychronization_kind == 'local': + insn.synchronization_kind == 'local': options.append('mem_kind=%s' % insn.mem_kind) if lhs: @@ -1558,7 +1558,7 @@ def get_global_barrier_order(kernel): insn = kernel.id_to_insn[my_insn_id] from loopy.kernel.instruction import BarrierInstruction return isinstance(insn, BarrierInstruction) and \ - insn.sychronization_kind == "global" + insn.synchronization_kind == "global" while unvisited: stack = [unvisited.pop()] @@ -1652,7 +1652,7 @@ def find_most_recent_global_barrier(kernel, insn_id): insn = kernel.id_to_insn[my_insn_id] from loopy.kernel.instruction import BarrierInstruction return isinstance(insn, BarrierInstruction) and \ - insn.sychronization_kind == "global" + insn.synchronization_kind == "global" global_barrier_to_ordinal = dict( (b, i) for i, b in enumerate(global_barrier_order)) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 73f17e89c..4d7a9ee32 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -84,7 +84,7 @@ class Barrier(ScheduleItem): A plain-text comment explaining why the barrier was inserted. - .. attribute:: sychronization_kind + .. attribute:: synchronization_kind ``"local"`` or ``"global"`` @@ -95,7 +95,7 @@ class Barrier(ScheduleItem): .. attribute:: originating_insn_id """ - hash_fields = ["comment", "sychronization_kind", "mem_kind"] + hash_fields = ["comment", "synchronization_kind", "mem_kind"] __slots__ = hash_fields + ["originating_insn_id"] # }}} @@ -441,12 +441,12 @@ def format_insn(kernel, insn_id): format_insn_id(kernel, insn_id)) elif isinstance(insn, BarrierInstruction): mem_kind = '' - if insn.sychronization_kind == 'local': + if insn.synchronization_kind == 'local': mem_kind = '{mem_kind=%s}' % insn.mem_kind return "[%s] %s... %sbarrier%s%s" % ( format_insn_id(kernel, insn_id), - Fore.MAGENTA, insn.sychronization_kind[0], mem_kind, Style.RESET_ALL) + Fore.MAGENTA, insn.synchronization_kind[0], mem_kind, Style.RESET_ALL) elif isinstance(insn, NoOpInstruction): return "[%s] %s... nop%s" % ( format_insn_id(kernel, insn_id), @@ -488,7 +488,7 @@ def dump_schedule(kernel, schedule): lines.append(indent + insn_str) elif isinstance(sched_item, Barrier): lines.append(indent + "... %sbarrier" % - sched_item.sychronization_kind[0]) + sched_item.synchronization_kind[0]) else: assert False @@ -843,7 +843,7 @@ def generate_loop_schedules_internal( from loopy.kernel.instruction import BarrierInstruction if isinstance(insn, BarrierInstruction) and \ - insn.sychronization_kind == "global": + insn.synchronization_kind == "global": if not sched_state.may_schedule_global_barriers: if debug_mode: print("can't schedule '%s' because global barriers are " @@ -1328,7 +1328,7 @@ def convert_barrier_instructions_to_barriers(kernel, schedule): insn = kernel.id_to_insn[sched_item.insn_id] if isinstance(insn, BarrierInstruction): result.append(Barrier( - sychronization_kind=insn.sychronization_kind, + synchronization_kind=insn.synchronization_kind, mem_kind=insn.mem_kind, originating_insn_id=insn.id, comment="Barrier inserted due to %s" % insn.id)) @@ -1589,7 +1589,7 @@ def _insn_ids_reaching_end(schedule, kind, reverse): # barrier() # end if barrier_kind_more_or_equally_global( - sched_item.sychronization_kind, kind): + sched_item.synchronization_kind, kind): insn_ids_alive_at_scope[-1].clear() else: insn_ids_alive_at_scope[-1] |= set( @@ -1619,15 +1619,15 @@ def append_barrier_or_raise_error(schedule, dep, verify_only): tgt=dep.target.id, src=dep.source.id)) schedule.append(Barrier( comment=comment, - sychronization_kind=dep.var_kind, + synchronization_kind=dep.var_kind, mem_kind=dep.var_kind, originating_insn_id=None)) -def insert_barriers(kernel, schedule, sychronization_kind, verify_only, level=0): +def insert_barriers(kernel, schedule, synchronization_kind, verify_only, level=0): """ - :arg sychronization_kind: "local" or "global". - The :attr:`Barrier.sychronization_kind` to be inserted. Generally, this + :arg synchronization_kind: "local" or "global". + The :attr:`Barrier.synchronization_kind` to be inserted. Generally, this function will be called once for each kind of barrier at the top level, where more global barriers should be inserted first. :arg verify_only: do not insert barriers, only complain if they are @@ -1638,7 +1638,7 @@ def insert_barriers(kernel, schedule, sychronization_kind, verify_only, level=0) # {{{ insert barriers at outermost scheduling level def insert_barriers_at_outer_level(schedule, reverse=False): - dep_tracker = DependencyTracker(kernel, var_kind=sychronization_kind, + dep_tracker = DependencyTracker(kernel, var_kind=synchronization_kind, reverse=reverse) if reverse: @@ -1646,7 +1646,7 @@ def insert_barriers(kernel, schedule, sychronization_kind, verify_only, level=0) # the schedule block. for insn_id in ( insn_ids_reaching_end_without_intervening_barrier( - schedule, sychronization_kind)): + schedule, synchronization_kind)): dep_tracker.add_source(insn_id) result = [] @@ -1660,11 +1660,11 @@ def insert_barriers(kernel, schedule, sychronization_kind, verify_only, level=0) loop_head = ( insn_ids_reachable_from_start_without_intervening_barrier( - subloop, sychronization_kind)) + subloop, synchronization_kind)) loop_tail = ( insn_ids_reaching_end_without_intervening_barrier( - subloop, sychronization_kind)) + subloop, synchronization_kind)) # Checks if a barrier is needed before the loop. This handles # dependencies with targets that can be reached without an @@ -1704,7 +1704,7 @@ def insert_barriers(kernel, schedule, sychronization_kind, verify_only, level=0) elif isinstance(sched_item, Barrier): result.append(sched_item) if barrier_kind_more_or_equally_global( - sched_item.sychronization_kind, sychronization_kind): + sched_item.synchronization_kind, synchronization_kind): dep_tracker.discard_all_sources() i += 1 @@ -1740,7 +1740,7 @@ def insert_barriers(kernel, schedule, sychronization_kind, verify_only, level=0) if isinstance(sched_item, EnterLoop): subloop, new_i = gather_schedule_block(schedule, i) new_subloop = insert_barriers( - kernel, subloop[1:-1], sychronization_kind, verify_only, + kernel, subloop[1:-1], synchronization_kind, verify_only, level + 1) result.append(subloop[0]) result.extend(new_subloop) @@ -1907,11 +1907,11 @@ def generate_loop_schedules_inner(kernel, debug_args={}): if not kernel.options.disable_global_barriers: logger.debug("%s: barrier insertion: global" % kernel.name) gen_sched = insert_barriers(kernel, gen_sched, - sychronization_kind="global", verify_only=True) + synchronization_kind="global", verify_only=True) logger.debug("%s: barrier insertion: local" % kernel.name) gen_sched = insert_barriers(kernel, gen_sched, - sychronization_kind="local", verify_only=False) + synchronization_kind="local", verify_only=False) logger.debug("%s: barrier insertion: done" % kernel.name) new_kernel = kernel.copy( diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index f9c78acba..5c41f0399 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -106,7 +106,7 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): [end_item]) elif isinstance(sched_item, Barrier): - if sched_item.sychronization_kind == "global": + if sched_item.synchronization_kind == "global": # Wrap the current chunk into a kernel call. schedule_required_splitting = True if current_chunk: diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 88def69e2..aac528087 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -189,9 +189,9 @@ class ASTBuilderBase(object): def add_vector_access(self, access_expr, index): raise NotImplementedError() - def emit_barrier(self, sychronization_kind, mem_kind, comment): + def emit_barrier(self, synchronization_kind, mem_kind, comment): """ - :arg sychronization_kind: ``"local"`` or ``"global"`` + :arg synchronization_kind: ``"local"`` or ``"global"`` :arg mem_kind: ``"local"`` or ``"global"`` """ raise NotImplementedError() diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index b1ae8427b..027f27838 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -291,19 +291,19 @@ class CUDACASTBuilder(CASTBuilder): def add_vector_access(self, access_expr, index): return access_expr.a(self._VEC_AXES[index]) - def emit_barrier(self, sychronization_kind, mem_kind, comment): + def emit_barrier(self, synchronization_kind, mem_kind, comment): """ :arg kind: ``"local"`` or ``"global"`` :arg memkind: unused :return: a :class:`loopy.codegen.GeneratedInstruction`. """ - if sychronization_kind == "local": + if synchronization_kind == "local": if comment: comment = " /* %s */" % comment from cgen import Statement return Statement("__syncthreads()%s" % comment) - elif sychronization_kind == "global": + elif synchronization_kind == "global": raise LoopyError("CUDA does not have global barriers") else: raise LoopyError("unknown barrier kind") diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 2b2196236..45a59847b 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -288,15 +288,15 @@ class ISPCASTBuilder(CASTBuilder): def add_vector_access(self, access_expr, index): return access_expr[index] - def emit_barrier(self, sychronization_kind, mem_kind, comment): + def emit_barrier(self, synchronization_kind, mem_kind, comment): from cgen import Comment, Statement assert comment - if sychronization_kind == "local": + if synchronization_kind == "local": return Comment("local barrier: %s" % comment) - elif sychronization_kind == "global": + elif synchronization_kind == "global": return Statement("sync; /* %s */" % comment) else: diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index d7b570f20..50d6acc7a 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -450,12 +450,12 @@ class OpenCLCASTBuilder(CASTBuilder): # The 'int' avoids an 'L' suffix for long ints. return access_expr.attr("s%s" % hex(int(index))[2:]) - def emit_barrier(self, sychronization_kind, mem_kind, comment): + def emit_barrier(self, synchronization_kind, mem_kind, comment): """ :arg kind: ``"local"`` or ``"global"`` :return: a :class:`loopy.codegen.GeneratedInstruction`. """ - if sychronization_kind == "local": + if synchronization_kind == "local": if comment: comment = " /* %s */" % comment @@ -463,7 +463,7 @@ class OpenCLCASTBuilder(CASTBuilder): from cgen import Statement return Statement("barrier(CLK_%s_MEM_FENCE)%s" % (mem_kind, comment)) - elif sychronization_kind == "global": + elif synchronization_kind == "global": raise LoopyError("OpenCL does not have global barriers") else: raise LoopyError("unknown barrier kind") diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index 9c2334ed3..ea1f31fe4 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -37,7 +37,7 @@ __doc__ = """ # {{{ add_barrier def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, - tags=None, sychronization_kind="global", mem_kind=None): + tags=None, synchronization_kind="global", mem_kind=None): """Takes in a kernel that needs to be added a barrier and returns a kernel which has a barrier inserted into it. It takes input of 2 instructions and then adds a barrier in between those 2 instructions. The expressions can @@ -49,17 +49,17 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, the barrier which is to be added :arg id: String on which the id of the barrier would be based on. :arg tags: The tag of the group to which the barrier must be added - :arg sychronization_kind: Kind of barrier to be added. May be "global" or "local" + :arg synchronization_kind: Kind of barrier to be added. May be "global" or "local" :arg kind: Type of memory to be synchronied. May be "global" or "local". Ignored - for "global" bariers. If not supplied, defaults to :arg:`sychronization_kind` + for "global" bariers. If not supplied, defaults to :arg:`synchronization_kind` """ if mem_kind is None: - mem_kind = sychronization_kind + mem_kind = synchronization_kind if id_based_on is None: id = knl.make_unique_instruction_id( - based_on=sychronization_kind[0]+"_barrier") + based_on=synchronization_kind[0]+"_barrier") else: id = knl.make_unique_instruction_id(based_on=id_based_on) @@ -71,7 +71,7 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, depends_on_is_final=True, id=id, tags=tags, - kind=sychronization_kind, + kind=synchronization_kind, mem_kind=mem_kind) new_knl = knl.copy(instructions=knl.instructions + [barrier_to_add]) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 6cb5ed718..4755ca177 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -811,7 +811,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, barrier_insn = BarrierInstruction( id=barrier_insn_id, depends_on=frozenset([compute_insn_id]), - sychronization_kind="global", + synchronization_kind="global", mem_kind="global") compute_dep_id = barrier_insn_id diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 97eff0a0d..f830fc9eb 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -351,7 +351,7 @@ class TemporarySaver(object): self.subkernel_to_slice_indices[subkernel]) def is_global_barrier(item): - return isinstance(item, Barrier) and item.sychronization_kind == "global" + return isinstance(item, Barrier) and item.synchronization_kind == "global" try: pre_barrier = next(item for item in -- GitLab From 35c3d3a6e6c2e1a692afa6cc5dd9f1b7f6410dd7 Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 14 Nov 2017 15:06:18 -0500 Subject: [PATCH 13/17] flake --- loopy/schedule/__init__.py | 3 ++- loopy/transform/add_barrier.py | 3 ++- loopy/transform/save.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 4d7a9ee32..850f0a61f 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -446,7 +446,8 @@ def format_insn(kernel, insn_id): return "[%s] %s... %sbarrier%s%s" % ( format_insn_id(kernel, insn_id), - Fore.MAGENTA, insn.synchronization_kind[0], mem_kind, Style.RESET_ALL) + Fore.MAGENTA, insn.synchronization_kind[0], mem_kind, + Style.RESET_ALL) elif isinstance(insn, NoOpInstruction): return "[%s] %s... nop%s" % ( format_insn_id(kernel, insn_id), diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index ea1f31fe4..c2422bba9 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -49,7 +49,8 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, the barrier which is to be added :arg id: String on which the id of the barrier would be based on. :arg tags: The tag of the group to which the barrier must be added - :arg synchronization_kind: Kind of barrier to be added. May be "global" or "local" + :arg synchronization_kind: Kind of barrier to be added. May be "global" or + "local" :arg kind: Type of memory to be synchronied. May be "global" or "local". Ignored for "global" bariers. If not supplied, defaults to :arg:`synchronization_kind` """ diff --git a/loopy/transform/save.py b/loopy/transform/save.py index f830fc9eb..b53488b48 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -351,7 +351,8 @@ class TemporarySaver(object): self.subkernel_to_slice_indices[subkernel]) def is_global_barrier(item): - return isinstance(item, Barrier) and item.synchronization_kind == "global" + return isinstance(item, Barrier) and \ + item.synchronization_kind == "global" try: pre_barrier = next(item for item in -- GitLab From 6f6d21f6393851fc9dba801588909bc885da037b Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 14 Nov 2017 15:19:11 -0500 Subject: [PATCH 14/17] fix --- loopy/transform/add_barrier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index c2422bba9..cfbbd56e9 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -72,7 +72,7 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, depends_on_is_final=True, id=id, tags=tags, - kind=synchronization_kind, + synchronization_kind=synchronization_kind, mem_kind=mem_kind) new_knl = knl.copy(instructions=knl.instructions + [barrier_to_add]) -- GitLab From b5d613b2f9fbb439fdbbc7e66e5d3d8004c1499b Mon Sep 17 00:00:00 2001 From: Nick Date: Wed, 15 Nov 2017 11:25:45 -0500 Subject: [PATCH 15/17] fix missing return value --- loopy/kernel/instruction.py | 1 + 1 file changed, 1 insertion(+) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 9005d40de..dbd99e850 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1352,6 +1352,7 @@ class BarrierInstruction(_DataObliviousInstruction): from warnings import warn warn("BarrierInstruction.kind is deprecated, use synchronization_kind " "instead", DeprecationWarning, stacklevel=2) + return self.synchronization_kind # }}} -- GitLab From 89efcf5277ac947e0de6e30a3221aa70894b50e7 Mon Sep 17 00:00:00 2001 From: Nick Date: Wed, 15 Nov 2017 11:27:22 -0500 Subject: [PATCH 16/17] bump data model --- loopy/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/version.py b/loopy/version.py index 5e07e979f..e14216272 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v68-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v69-islpy%s" % _islpy_version -- GitLab From 43c3e4fba77d2aa440cae254b0ac03321d15f64a Mon Sep 17 00:00:00 2001 From: Nick Date: Wed, 15 Nov 2017 11:50:49 -0500 Subject: [PATCH 17/17] convert to check illegal options --- loopy/kernel/creation.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 2c15b4789..c96839357 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -172,8 +172,7 @@ from collections import namedtuple _NosyncParseResult = namedtuple("_NosyncParseResult", "expr, scope") -def parse_insn_options(opt_dict, options_str, assignee_names=None, - insn_kind=None): +def parse_insn_options(opt_dict, options_str, assignee_names=None): if options_str is None: return opt_dict @@ -354,9 +353,6 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None, del assignee_name elif opt_key == "mem_kind": - if insn_kind not in ['gbarrier', 'lbarrier']: - raise LoopyError("Cannot supply memory synchronization type to " - "non-barrier instruction %s" % insn_kind) opt_value = opt_value.lower().strip() if opt_value not in ['local', 'global']: raise LoopyError("Unknown memory synchronization type %s specified" @@ -432,6 +428,17 @@ SUBST_RE = re.compile( r"^\s*(?P.+?)\s*:=\s*(?P.+)\s*$") +def check_illegal_options(insn_options, insn_type): + illegal_options = [] + if insn_type not in ['gbarrier', 'lbarrier']: + illegal_options.append('mem_kind') + + bad_options = [x for x in illegal_options if x in insn_options] + if bad_options: + raise LoopyError("Cannot supply option(s) '%s' to instruction type '%s'" % + ', '.join(bad_options), insn_type) + + def parse_insn(groups, insn_options): """ :return: a tuple ``(insn, inames_to_dup)``, where insn is a @@ -505,6 +512,9 @@ def parse_insn(groups, insn_options): groups["options"], assignee_names=assignee_names) + # check for bad options + check_illegal_options(insn_options, 'assignment') + insn_id = insn_options.pop("insn_id", None) inames_to_dup = insn_options.pop("inames_to_dup", []) @@ -574,8 +584,7 @@ def parse_special_insn(groups, insn_options): insn_options = parse_insn_options( insn_options.copy(), groups["options"], - assignee_names=(), - insn_kind=groups['kind']) + assignee_names=()) del insn_options["atomicity"] @@ -591,6 +600,8 @@ def parse_special_insn(groups, insn_options): from loopy.kernel.instruction import NoOpInstruction, BarrierInstruction special_insn_kind = groups["kind"] + # check for bad options + check_illegal_options(insn_options, special_insn_kind) if special_insn_kind == "gbarrier": cls = BarrierInstruction @@ -805,6 +816,8 @@ def parse_instructions(instructions, defines): parse_insn_options( insn_options_stack[-1], with_options_match.group("options"))) + # check for bad options + check_illegal_options(insn_options_stack[-1], 'with-block') continue for_match = FOR_RE.match(insn) -- GitLab