diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 8b85387259228777f028fb70b1c0cf2efcc2d2ef..69f89548618e86b408a31af240bee84678c859c1 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1107,11 +1107,12 @@ work item: :mod:`loopy` supports two kinds of barriers: -* *Local barriers* ensure consistency of local memory accesses to items within +* *Local barriers* ensure consistency of memory accesses to items within *the same* work group. This synchronizes with all instructions in the work - group. + group. The type of memory (local or global) may be specified by the + :attr:`loopy.instruction.BarrierInstruction.mem_kind` -* *Global barriers* ensure consistency of global memory accesses +* *Global barriers* ensure consistency of memory accesses across *all* work groups, i.e. it synchronizes with every work item executing the kernel. Note that there is no exact equivalent for this kind of barrier in OpenCL. [#global-barrier-note]_ diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index 5240042337163f0aefcbc7fdb8f3151ac280053f..e3e209726879741c31d686f2a6530e1b7ec67b97 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -156,7 +156,8 @@ def generate_code_for_sched_index(codegen_state, sched_index): if codegen_state.is_generating_device_code: barrier_ast = codegen_state.ast_builder.emit_barrier( - sched_item.kind, sched_item.comment) + sched_item.synchronization_kind, sched_item.mem_kind, + sched_item.comment) if sched_item.originating_insn_id: return CodeGenerationResult.new( codegen_state, @@ -167,7 +168,7 @@ def generate_code_for_sched_index(codegen_state, sched_index): return barrier_ast else: # host code - if sched_item.kind in ["global", "local"]: + if sched_item.synchronization_kind in ["global", "local"]: # host code is assumed globally and locally synchronous return CodeGenerationResult( host_program=None, @@ -176,8 +177,9 @@ def generate_code_for_sched_index(codegen_state, sched_index): implemented_data_info=codegen_state.implemented_data_info) else: - raise LoopyError("do not know how to emit code for barrier kind '%s'" - "in host code" % sched_item.kind) + raise LoopyError("do not know how to emit code for barrier " + "synchronization kind '%s'" "in host code" + % sched_item.synchronization_kind) # }}} diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index a50d797fea41ad60a44af1264457819e0680cea1..c96839357a6c0bf7a8877cba725598cfe1d344f0 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -352,6 +352,14 @@ def parse_insn_options(opt_dict, options_str, assignee_names=None): % v) del assignee_name + elif opt_key == "mem_kind": + opt_value = opt_value.lower().strip() + if opt_value not in ['local', 'global']: + raise LoopyError("Unknown memory synchronization type %s specified" + " expected, 'local' or 'global'." + % opt_value) + result["mem_kind"] = opt_value + else: raise ValueError( "unrecognized instruction option '%s' " @@ -420,6 +428,17 @@ SUBST_RE = re.compile( r"^\s*(?P.+?)\s*:=\s*(?P.+)\s*$") +def check_illegal_options(insn_options, insn_type): + illegal_options = [] + if insn_type not in ['gbarrier', 'lbarrier']: + illegal_options.append('mem_kind') + + bad_options = [x for x in illegal_options if x in insn_options] + if bad_options: + raise LoopyError("Cannot supply option(s) '%s' to instruction type '%s'" % + ', '.join(bad_options), insn_type) + + def parse_insn(groups, insn_options): """ :return: a tuple ``(insn, inames_to_dup)``, where insn is a @@ -493,6 +512,9 @@ def parse_insn(groups, insn_options): groups["options"], assignee_names=assignee_names) + # check for bad options + check_illegal_options(insn_options, 'assignment') + insn_id = insn_options.pop("insn_id", None) inames_to_dup = insn_options.pop("inames_to_dup", []) @@ -578,13 +600,15 @@ def parse_special_insn(groups, insn_options): from loopy.kernel.instruction import NoOpInstruction, BarrierInstruction special_insn_kind = groups["kind"] + # check for bad options + check_illegal_options(insn_options, special_insn_kind) if special_insn_kind == "gbarrier": cls = BarrierInstruction - kwargs["kind"] = "global" + kwargs["synchronization_kind"] = "global" elif special_insn_kind == "lbarrier": cls = BarrierInstruction - kwargs["kind"] = "local" + kwargs["synchronization_kind"] = "local" elif special_insn_kind == "nop": cls = NoOpInstruction else: @@ -792,6 +816,8 @@ def parse_instructions(instructions, defines): parse_insn_options( insn_options_stack[-1], with_options_match.group("options"))) + # check for bad options + check_illegal_options(insn_options_stack[-1], 'with-block') continue for_match = FOR_RE.match(insn) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index e6b0937856c45fd130f93742eb000c12921d0e11..dbd99e85016b00b3df4827ad7999e7b57e58af24 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1280,19 +1280,29 @@ class NoOpInstruction(_DataObliviousInstruction): class BarrierInstruction(_DataObliviousInstruction): """An instruction that requires synchronization with all - concurrent work items of :attr:`kind`. + concurrent work items of :attr:`synchronization_kind`. - .. attribute:: kind + .. attribute:: synchronization_kind A string, ``"global"`` or ``"local"``. + .. attribute:: mem_kind + + A string, ``"global"`` or ``"local"``. Chooses which memory type to + sychronize, for targets that require this (e.g. OpenCL) + The textual syntax in a :mod:`loopy` kernel is:: ... gbarrier ... lbarrier + + Note that the memory type :attr:`mem_kind` can be specified for local barriers:: + + ... lbarrier {mem_kind=global} """ - fields = _DataObliviousInstruction.fields | set(["kind"]) + fields = _DataObliviousInstruction.fields | set(["synchronization_kind", + "mem_kind"]) def __init__(self, id, depends_on=None, depends_on_is_final=None, groups=None, conflicts_with_groups=None, @@ -1300,7 +1310,8 @@ class BarrierInstruction(_DataObliviousInstruction): within_inames_is_final=None, within_inames=None, priority=None, boostable=None, boostable_into=None, - predicates=None, tags=None, kind="global"): + predicates=None, tags=None, synchronization_kind="global", + mem_kind="local"): if predicates: raise LoopyError("conditional barriers are not supported") @@ -1318,20 +1329,31 @@ class BarrierInstruction(_DataObliviousInstruction): boostable=boostable, boostable_into=boostable_into, predicates=predicates, - tags=tags, + tags=tags ) - self.kind = kind + self.synchronization_kind = synchronization_kind + self.mem_kind = mem_kind def __str__(self): - first_line = "%s: ... %sbarrier" % (self.id, self.kind[0]) + first_line = "%s: ... %sbarrier" % (self.id, self.synchronization_kind[0]) options = self.get_str_options() + if self.synchronization_kind == "local": + # add the memory kind + options += ['mem_kind={}'.format(self.mem_kind)] if options: first_line += " {%s}" % (": ".join(options)) return first_line + @property + def kind(self): + from warnings import warn + warn("BarrierInstruction.kind is deprecated, use synchronization_kind " + "instead", DeprecationWarning, stacklevel=2) + return self.synchronization_kind + # }}} diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 02df0f2b4fd27dcb0f8b847411aa3dea7f3f9169..a65e7fb4ceefd28a909dcb6cee24ea437f15a60e 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -1472,7 +1472,7 @@ def stringify_instruction_list(kernel): trailing = [l for l in insn.code.split("\n")] elif isinstance(insn, lp.BarrierInstruction): lhs = "" - rhs = "... %sbarrier" % insn.kind[0] + rhs = "... %sbarrier" % insn.synchronization_kind[0] trailing = [] elif isinstance(insn, lp.NoOpInstruction): @@ -1502,6 +1502,9 @@ def stringify_instruction_list(kernel): if insn.no_sync_with: options.append("no_sync_with=%s" % ":".join( "%s@%s" % entry for entry in sorted(insn.no_sync_with))) + if isinstance(insn, lp.BarrierInstruction) and \ + insn.synchronization_kind == 'local': + options.append('mem_kind=%s' % insn.mem_kind) if lhs: core = "%s = %s" % ( @@ -1554,7 +1557,8 @@ def get_global_barrier_order(kernel): def is_barrier(my_insn_id): insn = kernel.id_to_insn[my_insn_id] from loopy.kernel.instruction import BarrierInstruction - return isinstance(insn, BarrierInstruction) and insn.kind == "global" + return isinstance(insn, BarrierInstruction) and \ + insn.synchronization_kind == "global" while unvisited: stack = [unvisited.pop()] @@ -1647,7 +1651,8 @@ def find_most_recent_global_barrier(kernel, insn_id): def is_barrier(my_insn_id): insn = kernel.id_to_insn[my_insn_id] from loopy.kernel.instruction import BarrierInstruction - return isinstance(insn, BarrierInstruction) and insn.kind == "global" + return isinstance(insn, BarrierInstruction) and \ + insn.synchronization_kind == "global" global_barrier_to_ordinal = dict( (b, i) for i, b in enumerate(global_barrier_order)) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index abf4d799fbdb14f86fa29dde26e6654130fc66de..850f0a61fcdc2878d43895bc0e024032532aa680 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -84,14 +84,18 @@ class Barrier(ScheduleItem): A plain-text comment explaining why the barrier was inserted. - .. attribute:: kind + .. attribute:: synchronization_kind + + ``"local"`` or ``"global"`` + + .. attribute:: mem_kind ``"local"`` or ``"global"`` .. attribute:: originating_insn_id """ - hash_fields = ["comment", "kind"] + hash_fields = ["comment", "synchronization_kind", "mem_kind"] __slots__ = hash_fields + ["originating_insn_id"] # }}} @@ -436,9 +440,14 @@ def format_insn(kernel, insn_id): Fore.MAGENTA, str(insn.expression), Style.RESET_ALL, format_insn_id(kernel, insn_id)) elif isinstance(insn, BarrierInstruction): - return "[%s] %s... %sbarrier%s" % ( + mem_kind = '' + if insn.synchronization_kind == 'local': + mem_kind = '{mem_kind=%s}' % insn.mem_kind + + return "[%s] %s... %sbarrier%s%s" % ( format_insn_id(kernel, insn_id), - Fore.MAGENTA, insn.kind[0], Style.RESET_ALL) + Fore.MAGENTA, insn.synchronization_kind[0], mem_kind, + Style.RESET_ALL) elif isinstance(insn, NoOpInstruction): return "[%s] %s... nop%s" % ( format_insn_id(kernel, insn_id), @@ -479,7 +488,8 @@ def dump_schedule(kernel, schedule): insn_str = sched_item.insn_id lines.append(indent + insn_str) elif isinstance(sched_item, Barrier): - lines.append(indent + "... %sbarrier" % sched_item.kind[0]) + lines.append(indent + "... %sbarrier" % + sched_item.synchronization_kind[0]) else: assert False @@ -833,7 +843,8 @@ def generate_loop_schedules_internal( # {{{ check if scheduler state allows insn scheduling from loopy.kernel.instruction import BarrierInstruction - if isinstance(insn, BarrierInstruction) and insn.kind == "global": + if isinstance(insn, BarrierInstruction) and \ + insn.synchronization_kind == "global": if not sched_state.may_schedule_global_barriers: if debug_mode: print("can't schedule '%s' because global barriers are " @@ -1318,7 +1329,8 @@ def convert_barrier_instructions_to_barriers(kernel, schedule): insn = kernel.id_to_insn[sched_item.insn_id] if isinstance(insn, BarrierInstruction): result.append(Barrier( - kind=insn.kind, + synchronization_kind=insn.synchronization_kind, + mem_kind=insn.mem_kind, originating_insn_id=insn.id, comment="Barrier inserted due to %s" % insn.id)) continue @@ -1577,7 +1589,8 @@ def _insn_ids_reaching_end(schedule, kind, reverse): # end # barrier() # end - if barrier_kind_more_or_equally_global(sched_item.kind, kind): + if barrier_kind_more_or_equally_global( + sched_item.synchronization_kind, kind): insn_ids_alive_at_scope[-1].clear() else: insn_ids_alive_at_scope[-1] |= set( @@ -1607,15 +1620,17 @@ def append_barrier_or_raise_error(schedule, dep, verify_only): tgt=dep.target.id, src=dep.source.id)) schedule.append(Barrier( comment=comment, - kind=dep.var_kind, + synchronization_kind=dep.var_kind, + mem_kind=dep.var_kind, originating_insn_id=None)) -def insert_barriers(kernel, schedule, kind, verify_only, level=0): +def insert_barriers(kernel, schedule, synchronization_kind, verify_only, level=0): """ - :arg kind: "local" or "global". The :attr:`Barrier.kind` to be inserted. - Generally, this function will be called once for each kind of barrier - at the top level, where more global barriers should be inserted first. + :arg synchronization_kind: "local" or "global". + The :attr:`Barrier.synchronization_kind` to be inserted. Generally, this + function will be called once for each kind of barrier at the top level, where + more global barriers should be inserted first. :arg verify_only: do not insert barriers, only complain if they are missing. :arg level: the current level of loop nesting, 0 for outermost. @@ -1624,14 +1639,15 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): # {{{ insert barriers at outermost scheduling level def insert_barriers_at_outer_level(schedule, reverse=False): - dep_tracker = DependencyTracker(kernel, var_kind=kind, reverse=reverse) + dep_tracker = DependencyTracker(kernel, var_kind=synchronization_kind, + reverse=reverse) if reverse: # Populate the dependency tracker with sources from the tail end of # the schedule block. for insn_id in ( insn_ids_reaching_end_without_intervening_barrier( - schedule, kind)): + schedule, synchronization_kind)): dep_tracker.add_source(insn_id) result = [] @@ -1645,11 +1661,11 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): loop_head = ( insn_ids_reachable_from_start_without_intervening_barrier( - subloop, kind)) + subloop, synchronization_kind)) loop_tail = ( insn_ids_reaching_end_without_intervening_barrier( - subloop, kind)) + subloop, synchronization_kind)) # Checks if a barrier is needed before the loop. This handles # dependencies with targets that can be reached without an @@ -1688,7 +1704,8 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): elif isinstance(sched_item, Barrier): result.append(sched_item) - if barrier_kind_more_or_equally_global(sched_item.kind, kind): + if barrier_kind_more_or_equally_global( + sched_item.synchronization_kind, synchronization_kind): dep_tracker.discard_all_sources() i += 1 @@ -1724,7 +1741,8 @@ def insert_barriers(kernel, schedule, kind, verify_only, level=0): if isinstance(sched_item, EnterLoop): subloop, new_i = gather_schedule_block(schedule, i) new_subloop = insert_barriers( - kernel, subloop[1:-1], kind, verify_only, level + 1) + kernel, subloop[1:-1], synchronization_kind, verify_only, + level + 1) result.append(subloop[0]) result.extend(new_subloop) result.append(subloop[-1]) @@ -1890,11 +1908,11 @@ def generate_loop_schedules_inner(kernel, debug_args={}): if not kernel.options.disable_global_barriers: logger.debug("%s: barrier insertion: global" % kernel.name) gen_sched = insert_barriers(kernel, gen_sched, - kind="global", verify_only=True) + synchronization_kind="global", verify_only=True) logger.debug("%s: barrier insertion: local" % kernel.name) - gen_sched = insert_barriers(kernel, gen_sched, kind="local", - verify_only=False) + gen_sched = insert_barriers(kernel, gen_sched, + synchronization_kind="local", verify_only=False) logger.debug("%s: barrier insertion: done" % kernel.name) new_kernel = kernel.copy( diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index 1a0789c2f61e21e4a0371e2a73195c9771245527..5c41f03997e5193333f5be213f2f87d38147b6df 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -106,7 +106,7 @@ def map_schedule_onto_host_or_device_impl(kernel, device_prog_name_gen): [end_item]) elif isinstance(sched_item, Barrier): - if sched_item.kind == "global": + if sched_item.synchronization_kind == "global": # Wrap the current chunk into a kernel call. schedule_required_splitting = True if current_chunk: diff --git a/loopy/statistics.py b/loopy/statistics.py index 88d7ec328345fd4c97d75b449385316f99c2509d..72d0c6c7d7a634cd96379d17b7a91f6a638e0ab9 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -1417,7 +1417,8 @@ def get_synchronization_map(knl): iname_list.pop() elif isinstance(sched_item, Barrier): - result = result + ToCountMap({"barrier_%s" % sched_item.kind: + result = result + ToCountMap({"barrier_%s" % + sched_item.synchronization_kind: get_count_poly(iname_list)}) elif isinstance(sched_item, CallKernel): diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 5800a0236e8ae5f81a63942c31a74822bc2fab96..aac528087cf812a91553d416f166be898a1cd132 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -189,9 +189,10 @@ class ASTBuilderBase(object): def add_vector_access(self, access_expr, index): raise NotImplementedError() - def emit_barrier(self, kind, comment): + def emit_barrier(self, synchronization_kind, mem_kind, comment): """ - :arg kind: ``"local"`` or ``"global"`` + :arg synchronization_kind: ``"local"`` or ``"global"`` + :arg mem_kind: ``"local"`` or ``"global"`` """ raise NotImplementedError() diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 2bdffb5aa69bdc0f72fe12a58faa6d0e78920e0f..027f27838bf68511905bd34cf75d0b361c749629 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -291,18 +291,19 @@ class CUDACASTBuilder(CASTBuilder): def add_vector_access(self, access_expr, index): return access_expr.a(self._VEC_AXES[index]) - def emit_barrier(self, kind, comment): + def emit_barrier(self, synchronization_kind, mem_kind, comment): """ :arg kind: ``"local"`` or ``"global"`` + :arg memkind: unused :return: a :class:`loopy.codegen.GeneratedInstruction`. """ - if kind == "local": + if synchronization_kind == "local": if comment: comment = " /* %s */" % comment from cgen import Statement return Statement("__syncthreads()%s" % comment) - elif kind == "global": + elif synchronization_kind == "global": raise LoopyError("CUDA does not have global barriers") else: raise LoopyError("unknown barrier kind") diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 35dade90494906b61aad9eb66e7271f2c5d1e180..45a59847ba9f175df5ca1be46aa78566b2aab03b 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -288,15 +288,15 @@ class ISPCASTBuilder(CASTBuilder): def add_vector_access(self, access_expr, index): return access_expr[index] - def emit_barrier(self, kind, comment): + def emit_barrier(self, synchronization_kind, mem_kind, comment): from cgen import Comment, Statement assert comment - if kind == "local": + if synchronization_kind == "local": return Comment("local barrier: %s" % comment) - elif kind == "global": + elif synchronization_kind == "global": return Statement("sync; /* %s */" % comment) else: diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index a5f7562c41c3ec8eca673904550e078d2a992241..50d6acc7af69151d30ad4940a303ca57f047815c 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -450,18 +450,20 @@ class OpenCLCASTBuilder(CASTBuilder): # The 'int' avoids an 'L' suffix for long ints. return access_expr.attr("s%s" % hex(int(index))[2:]) - def emit_barrier(self, kind, comment): + def emit_barrier(self, synchronization_kind, mem_kind, comment): """ :arg kind: ``"local"`` or ``"global"`` :return: a :class:`loopy.codegen.GeneratedInstruction`. """ - if kind == "local": + if synchronization_kind == "local": if comment: comment = " /* %s */" % comment + mem_kind = mem_kind.upper() + from cgen import Statement - return Statement("barrier(CLK_LOCAL_MEM_FENCE)%s" % comment) - elif kind == "global": + return Statement("barrier(CLK_%s_MEM_FENCE)%s" % (mem_kind, comment)) + elif synchronization_kind == "global": raise LoopyError("OpenCL does not have global barriers") else: raise LoopyError("unknown barrier kind") diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index 00c99edce1ee741572e378d3610e533c6621ecfb..cfbbd56e906c5e622debcd82bd5368aa3b1fb34c 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -37,7 +37,7 @@ __doc__ = """ # {{{ add_barrier def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, - tags=None, kind="global"): + tags=None, synchronization_kind="global", mem_kind=None): """Takes in a kernel that needs to be added a barrier and returns a kernel which has a barrier inserted into it. It takes input of 2 instructions and then adds a barrier in between those 2 instructions. The expressions can @@ -49,11 +49,18 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, the barrier which is to be added :arg id: String on which the id of the barrier would be based on. :arg tags: The tag of the group to which the barrier must be added - :arg kind: Kind of barrier to be added. May be "global" or "local". + :arg synchronization_kind: Kind of barrier to be added. May be "global" or + "local" + :arg kind: Type of memory to be synchronied. May be "global" or "local". Ignored + for "global" bariers. If not supplied, defaults to :arg:`synchronization_kind` """ + if mem_kind is None: + mem_kind = synchronization_kind + if id_based_on is None: - id = knl.make_unique_instruction_id(based_on=kind[0]+"_barrier") + id = knl.make_unique_instruction_id( + based_on=synchronization_kind[0]+"_barrier") else: id = knl.make_unique_instruction_id(based_on=id_based_on) @@ -65,7 +72,8 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None, depends_on_is_final=True, id=id, tags=tags, - kind=kind) + synchronization_kind=synchronization_kind, + mem_kind=mem_kind) new_knl = knl.copy(instructions=knl.instructions + [barrier_to_add]) new_knl = add_dependency(kernel=new_knl, diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 6077332c4fc4322ac7ffb02ade4a0e24c7066245..4755ca1774a15480a2c6b255380dd724e47f9042 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -811,7 +811,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, barrier_insn = BarrierInstruction( id=barrier_insn_id, depends_on=frozenset([compute_insn_id]), - kind="global") + synchronization_kind="global", + mem_kind="global") compute_dep_id = barrier_insn_id added_compute_insns.append(barrier_insn) diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 2ba2338b0af541274cc0362c9f71cec9c2887ffc..b53488b486c6750742b269f47cfd4f08b8f8fab9 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -351,7 +351,8 @@ class TemporarySaver(object): self.subkernel_to_slice_indices[subkernel]) def is_global_barrier(item): - return isinstance(item, Barrier) and item.kind == "global" + return isinstance(item, Barrier) and \ + item.synchronization_kind == "global" try: pre_barrier = next(item for item in diff --git a/loopy/version.py b/loopy/version.py index 5e07e979f2d44684be00290328244496176337b3..e142162729d5a374082fa853dcc763665f7dfe33 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v68-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v69-islpy%s" % _islpy_version diff --git a/test/test_loopy.py b/test/test_loopy.py index d0398f216a7f85798bc5f125e353578e74765b9f..1aba532a19afd9de100916806db2f3efa14c3181 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1090,6 +1090,28 @@ def test_literal_local_barrier(ctx_factory): lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=5)) +def test_local_barrier_mem_kind(): + def __test_type(mtype, expected): + insn = '... lbarrier' + if mtype: + insn += '{mem_kind=%s}' % mtype + knl = lp.make_kernel( + "{ [i]: 0<=i