diff --git a/loopy/__init__.py b/loopy/__init__.py index 329313b8d1b2d7a061472e9abcbd738299ea6087..a5d06881e29202a75ae1abc1024a51cc6b0d3608 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -75,7 +75,8 @@ from loopy.transform.instruction import ( set_instruction_priority, add_dependency, remove_instructions, replace_instruction_ids, - tag_instructions) + tag_instructions, + add_nosync) from loopy.transform.data import ( add_prefetch, change_arg_to_image, diff --git a/loopy/check.py b/loopy/check.py index 6a1e3dc33a33b826ad54c42a549b35ad275d9fe5..68ca4a2b3082a1427393f6c6243a8a0e9cf4fd88 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -505,22 +505,22 @@ def check_that_atomic_ops_are_used_exactly_on_atomic_arrays(kernel): # {{{ check that temporaries are defined in subkernels where used def check_that_temporaries_are_defined_in_subkernels_where_used(kernel): - from loopy.schedule.tools import InstructionQuery from loopy.kernel.data import temp_var_scope - insn_query = InstructionQuery(kernel) - - for subkernel in insn_query.subkernels(): + for subkernel in kernel.subkernels: defined_base_storage = set() - for temporary in insn_query.temporaries_written_in_subkernel(subkernel): + from loopy.schedule.tools import ( + temporaries_written_in_subkernel, temporaries_read_in_subkernel) + + for temporary in temporaries_written_in_subkernel(kernel, subkernel): tval = kernel.temporary_variables[temporary] if tval.base_storage is not None: defined_base_storage.add(tval.base_storage) for temporary in ( - insn_query.temporaries_read_in_subkernel(subkernel) - - insn_query.temporaries_written_in_subkernel(subkernel)): + temporaries_read_in_subkernel(kernel, subkernel) - + temporaries_written_in_subkernel(kernel, subkernel)): tval = kernel.temporary_variables[temporary] if tval.initializer is not None: @@ -530,16 +530,17 @@ def check_that_temporaries_are_defined_in_subkernels_where_used(kernel): if tval.base_storage is not None: if tval.base_storage not in defined_base_storage: from loopy.diagnostic import MissingDefinitionError - raise MissingDefinitionError("temporary variable '%s' gets used " - "in subkernel '%s' and neither it nor its aliases have a " - "definition" % (temporary, subkernel)) + raise MissingDefinitionError("temporary variable '%s' gets " + "used in subkernel '%s' and neither it nor its " + "aliases have a definition" % (temporary, subkernel)) continue if tval.scope in (temp_var_scope.PRIVATE, temp_var_scope.LOCAL): from loopy.diagnostic import MissingDefinitionError - raise MissingDefinitionError("temporary variable '%s' gets used in " - "subkernel '%s' without a definition (maybe you forgot to call " - "loopy.save_and_reload_temporaries?)" % (temporary, subkernel)) + raise MissingDefinitionError("temporary variable '%s' gets used " + "in subkernel '%s' without a definition (maybe you forgot " + "to call loopy.save_and_reload_temporaries?)" + % (temporary, subkernel)) # }}} diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 3145970c717905a4adfb07093a978fb4ce86a1cb..dea9c93b92e82675a836a0fc338f723aa603404f 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -786,6 +786,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): for var_name in insn.read_dependency_names() & admissible_vars: result.setdefault(var_name, set()).add(insn.id) + return result + @memoize_method def writer_map(self): """ @@ -914,6 +916,40 @@ class LoopKernel(ImmutableRecordWithoutPickling): for dep in insn.depends_on), key=get_barrier_ordinal) + @property + @memoize_method + def subkernels(self): + return tuple(self.subkernel_to_insn_ids.keys()) + + @property + @memoize_method + def subkernel_to_insn_ids(self): + if self.state != kernel_state.SCHEDULED: + raise LoopyError("Kernel must be scheduled") + + from loopy.schedule import ( + sched_item_to_insn_id, CallKernel, ReturnFromKernel) + + from collections import OrderedDict + result = OrderedDict() + + subkernel = None + + for sched_item in self.schedule: + if isinstance(sched_item, CallKernel): + subkernel = sched_item.kernel_name + result[subkernel] = set() + + if isinstance(sched_item, ReturnFromKernel): + subkernel = None + + if subkernel is not None: + for insn_id in sched_item_to_insn_id(sched_item): + result[subkernel].add(insn_id) + + return OrderedDict( + (subkernel, frozenset(ids)) for subkernel, ids in result.items()) + # }}} # {{{ argument wrangling diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index 692e3902827cd03d47f8ff130b5d596c6dad91ba..e058fe30f1f6a9fc2746b4c542e88041f76818d3 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -23,10 +23,6 @@ THE SOFTWARE. """ from loopy.kernel.data import temp_var_scope -from loopy.schedule import (BeginBlockItem, CallKernel, EndBlockItem, - RunInstruction, Barrier) - -from pytools import memoize_method # {{{ block boundary finder @@ -37,6 +33,7 @@ def get_block_boundaries(schedule): :class:`loopy.schedule.BlockBeginItem`s to :class:`loopy.schedule.BlockEndItem`s and vice versa. """ + from loopy.schedule import (BeginBlockItem, EndBlockItem) block_bounds = {} active_blocks = [] for idx, sched_item in enumerate(schedule): @@ -51,98 +48,20 @@ def get_block_boundaries(schedule): # }}} -# {{{ instruction query utility - -class InstructionQuery(object): - - def __init__(self, kernel): - self.kernel = kernel - block_bounds = get_block_boundaries(kernel.schedule) - subkernel_slices = {} - from six import iteritems - for start, end in iteritems(block_bounds): - sched_item = kernel.schedule[start] - if isinstance(sched_item, CallKernel): - subkernel_slices[sched_item.kernel_name] = slice(start, end + 1) - self.subkernel_slices = subkernel_slices - - @memoize_method - def subkernels(self): - return frozenset(self.subkernel_slices.keys()) - - @memoize_method - def insns_reading_or_writing(self, var): - return frozenset(insn.id for insn in self.kernel.instructions - if var in insn.read_dependency_names() - or var in insn.assignee_var_names()) - - @memoize_method - def insns_in_subkernel(self, subkernel): - return frozenset(sched_item.insn_id for sched_item - in self.kernel.schedule[self.subkernel_slices[subkernel]] - if isinstance(sched_item, RunInstruction)) - - @memoize_method - def temporaries_read_in_subkernel(self, subkernel): - return frozenset( - var - for insn in self.insns_in_subkernel(subkernel) - for var in self.kernel.id_to_insn[insn].read_dependency_names() - if var in self.kernel.temporary_variables) - - @memoize_method - def temporaries_written_in_subkernel(self, subkernel): - return frozenset( - var - for insn in self.insns_in_subkernel(subkernel) - for var in self.kernel.id_to_insn[insn].assignee_var_names() - if var in self.kernel.temporary_variables) - - @memoize_method - def temporaries_read_or_written_in_subkernel(self, subkernel): - return ( - self.temporaries_read_in_subkernel(subkernel) | - self.temporaries_written_in_subkernel(subkernel)) - - @memoize_method - def inames_in_subkernel(self, subkernel): - subkernel_start = self.subkernel_slices[subkernel].start - return frozenset(self.kernel.schedule[subkernel_start].extra_inames) - - @memoize_method - def pre_and_post_barriers(self, subkernel): - subkernel_start = self.subkernel_slices[subkernel].start - subkernel_end = self.subkernel_slices[subkernel].stop - - def is_global_barrier(item): - return isinstance(item, Barrier) and item.kind == "global" - - try: - pre_barrier = next(item for item in - self.kernel.schedule[subkernel_start::-1] - if is_global_barrier(item)).originating_insn_id - except StopIteration: - pre_barrier = None - - try: - post_barrier = next(item for item in - self.kernel.schedule[subkernel_end:] - if is_global_barrier(item)).originating_insn_id - except StopIteration: - post_barrier = None - - return (pre_barrier, post_barrier) - - @memoize_method - def hw_inames(self, insn_id): - """ - Return the inames that insn runs in and that are tagged as hardware - parallel. - """ - from loopy.kernel.data import HardwareParallelTag - return set(iname for iname in self.kernel.insn_inames(insn_id) - if isinstance(self.kernel.iname_to_tag.get(iname), - HardwareParallelTag)) +# {{{ subkernel tools + +def temporaries_read_in_subkernel(kernel, subkernel): + return frozenset(tv + for insn_id in kernel.subkernel_to_insn_ids[subkernel] + for tv in kernel.id_to_insn[insn_id].read_dependency_names() + if tv in kernel.temporary_variables) + + +def temporaries_written_in_subkernel(kernel, subkernel): + return frozenset(tv + for insn_id in kernel.subkernel_to_insn_ids[subkernel] + for tv in kernel.id_to_insn[insn_id].write_dependency_names() + if tv in kernel.temporary_variables) # }}} @@ -155,23 +74,27 @@ def add_extra_args_to_schedule(kernel): instructions in the schedule with global temporaries. """ new_schedule = [] - - insn_query = InstructionQuery(kernel) + from loopy.schedule import CallKernel for sched_item in kernel.schedule: if isinstance(sched_item, CallKernel): - subrange_temporaries = (insn_query - .temporaries_read_or_written_in_subkernel(sched_item.kernel_name)) + subkernel = sched_item.kernel_name + + used_temporaries = ( + temporaries_read_in_subkernel(kernel, subkernel) + | temporaries_written_in_subkernel(kernel, subkernel)) + more_args = set(tv - for tv in subrange_temporaries - if - kernel.temporary_variables[tv].scope == temp_var_scope.GLOBAL - and - kernel.temporary_variables[tv].initializer is None - and - tv not in sched_item.extra_args) + for tv in used_temporaries + if + kernel.temporary_variables[tv].scope == temp_var_scope.GLOBAL + and + kernel.temporary_variables[tv].initializer is None + and + tv not in sched_item.extra_args) + new_schedule.append(sched_item.copy( - extra_args=sched_item.extra_args + sorted(more_args))) + extra_args=sched_item.extra_args + sorted(more_args))) else: new_schedule.append(sched_item) diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index e3f3eb4ad49828c0ecad702166cd519bf3972cbc..410274f907b250bb81583281ebe503e8c8c80373 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -219,11 +219,11 @@ def tag_instructions(kernel, new_tag, within=None): # {{{ add nosync -def add_nosync_to_instructions( - kernel, scope, source, sink, bidirectional=False): +def add_nosync(kernel, scope, source, sink, bidirectional=False, force=False): """Add a *no_sync_with* directive between *source* and *sink*. - *no_sync_with* is only added if a dependency is present or if the - instruction pair is in a conflicting group. + *no_sync_with* is only added if a (syntactic) dependency edge + is present or if the instruction pair is in a conflicting group + (this does not check for memory dependencies). :arg kernel: :arg source: Either a single instruction id, or any instruction id @@ -234,6 +234,9 @@ def add_nosync_to_instructions( :arg bidirectional: A :class:`bool`. If *True*, add a *no_sync_with* to both the source and sink instructions, otherwise the directive is only added to the sink instructions. + :arg force: A :class:`bool`. If *True*, will add a *no_sync_with* + even without the presence of a syntactic dependency edge/ + conflicting instruction group. :return: The updated kernel """ @@ -259,12 +262,12 @@ def add_nosync_to_instructions( bool(insn2.groups & insn1.conflicts_with_groups)) from collections import defaultdict - nosync_to_add = defaultdict(lambda: set()) + nosync_to_add = defaultdict(set) for sink in sinks: for source in sources: - needs_nosync = ( + needs_nosync = force or ( source in kernel.recursive_insn_dep_map()[sink] or insns_in_conflicting_groups(source, sink)) diff --git a/loopy/transform/save.py b/loopy/transform/save.py index efe0e83f40e10d20d23514cd2aef024333a2db17..1c431fa10ab61109094e666423f43ae5906c5a65 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -32,7 +32,7 @@ from loopy.schedule import ( EnterLoop, LeaveLoop, RunInstruction, CallKernel, ReturnFromKernel, Barrier) -from loopy.schedule.tools import (get_block_boundaries, InstructionQuery) +from loopy.schedule.tools import get_block_boundaries import logging @@ -232,7 +232,6 @@ class TemporarySaver(object): def __init__(self, kernel): self.kernel = kernel - self.insn_query = InstructionQuery(kernel) self.var_name_gen = kernel.get_var_name_generator() self.insn_name_gen = kernel.get_instruction_id_generator() @@ -243,12 +242,14 @@ class TemporarySaver(object): self.updated_iname_to_tag = {} self.updated_temporary_variables = {} - # temporary name -> save or reload insns - self.saves_or_reloads_added = {} - + # temporary name -> save or reload insn ids from collections import defaultdict - self.subkernel_to_saves = defaultdict(lambda: set()) - self.subkernel_to_reloads = defaultdict(lambda: set()) + self.temporary_to_save_ids = defaultdict(set) + self.temporary_to_reload_ids = defaultdict(set) + self.subkernel_to_newly_added_insn_ids = defaultdict(set) + + # Maps names of base_storage to the name of the temporary + # representative chosen for saves/reloads self.base_storage_to_representative = {} from loopy.kernel.data import ValueArg @@ -262,6 +263,64 @@ class TemporarySaver(object): arg.name for arg in kernel.args if isinstance(arg, ValueArg))))) + @property + @memoize_method + def subkernel_to_slice_indices(self): + result = {} + + for sched_item_idx, sched_item in enumerate(self.kernel.schedule): + if isinstance(sched_item, CallKernel): + start_idx = sched_item_idx + elif isinstance(sched_item, ReturnFromKernel): + result[sched_item.kernel_name] = (start_idx, 1 + sched_item_idx) + + return result + + @property + @memoize_method + def subkernel_to_surrounding_inames(self): + current_outer_inames = set() + within_subkernel = False + result = {} + + for sched_item_idx, sched_item in enumerate(self.kernel.schedule): + if isinstance(sched_item, CallKernel): + within_subkernel = True + result[sched_item.kernel_name] = frozenset(current_outer_inames) + elif isinstance(sched_item, ReturnFromKernel): + within_subkernel = False + elif isinstance(sched_item, EnterLoop): + if not within_subkernel: + current_outer_inames.add(sched_item.iname) + elif isinstance(sched_item, LeaveLoop): + current_outer_inames.discard(sched_item.iname) + + return result + + @memoize_method + def get_defining_global_barrier_pair(self, subkernel): + subkernel_start, subkernel_end = ( + self.subkernel_to_slice_indices[subkernel]) + + def is_global_barrier(item): + return isinstance(item, Barrier) and item.kind == "global" + + try: + pre_barrier = next(item for item in + self.kernel.schedule[subkernel_start::-1] + if is_global_barrier(item)).originating_insn_id + except StopIteration: + pre_barrier = None + + try: + post_barrier = next(item for item in + self.kernel.schedule[subkernel_end:] + if is_global_barrier(item)).originating_insn_id + except StopIteration: + post_barrier = None + + return (pre_barrier, post_barrier) + def get_hw_axis_sizes_and_tags_for_save_slot(self, temporary): """ This is used for determining the amount of global storage needed for saving @@ -272,9 +331,9 @@ class TemporarySaver(object): In the case of local temporaries, inames that are tagged hw-local do not contribute to the global storage shape. """ - - accessor_insn_ids = ( - self.insn_query.insns_reading_or_writing(temporary.name)) + accessor_insn_ids = frozenset( + self.kernel.reader_map()[temporary.name] + | self.kernel.writer_map()[temporary.name]) group_tags = None local_tags = None @@ -355,8 +414,9 @@ class TemporarySaver(object): return None if temporary.base_storage in self.base_storage_to_representative: - # FIXME: Pick the representative with the largest size... - return self.base_storage_to_representative[temporary.base_storage] + # XXX: Todo: Warn about multiple base_storage + #repr = self.base_storage_to_representative[temporary.base_storage] + pass hw_dims, hw_tags = self.get_hw_axis_sizes_and_tags_for_save_slot(temporary) non_hw_dims = temporary.shape @@ -388,19 +448,9 @@ class TemporarySaver(object): if promoted_temporary is None: return - if mode == "save": - if promoted_temporary.name in self.subkernel_to_saves[subkernel]: - return - self.subkernel_to_saves[subkernel].add(promoted_temporary.name) - - elif mode == "reload": - if promoted_temporary.name in self.subkernel_to_reloads[subkernel]: - return - self.subkernel_to_reloads[subkernel].add(promoted_temporary.name) - - new_subdomain, hw_inames, dim_inames, iname_to_tag = \ + new_subdomain, hw_inames, dim_inames, iname_to_tag = ( self.augment_domain_for_save_or_reload( - self.new_subdomain, promoted_temporary, mode, subkernel) + self.new_subdomain, promoted_temporary, mode, subkernel)) self.new_subdomain = new_subdomain @@ -417,8 +467,8 @@ class TemporarySaver(object): tuple(map(Variable, subscript))) orig_temporary = ( - self.kernel.temporary_variables[ - promoted_temporary.orig_temporary_name]) + self.kernel.temporary_variables[ + promoted_temporary.orig_temporary_name]) dim_inames_trunc = dim_inames[:len(orig_temporary.shape)] args = ( @@ -430,9 +480,10 @@ class TemporarySaver(object): if mode == "save": args = reversed(args) - accessing_insns_in_subkernel = ( - self.insn_query.insns_reading_or_writing(temporary) & - self.insn_query.insns_in_subkernel(subkernel)) + accessing_insns_in_subkernel = (frozenset( + self.kernel.reader_map()[temporary] + | self.kernel.writer_map()[temporary]) + & self.kernel.subkernel_to_insn_ids[subkernel]) if mode == "save": depends_on = accessing_insns_in_subkernel @@ -441,7 +492,7 @@ class TemporarySaver(object): depends_on = frozenset() update_deps = accessing_insns_in_subkernel - pre_barrier, post_barrier = self.insn_query.pre_and_post_barriers(subkernel) + pre_barrier, post_barrier = self.get_defining_global_barrier_pair(subkernel) if pre_barrier is not None: depends_on |= set([pre_barrier]) @@ -455,16 +506,19 @@ class TemporarySaver(object): *args, id=save_or_load_insn_id, within_inames=( - self.insn_query.inames_in_subkernel(subkernel) | - frozenset(hw_inames + dim_inames)), + self.subkernel_to_surrounding_inames[subkernel] + | frozenset(hw_inames + dim_inames)), within_inames_is_final=True, depends_on=depends_on, boostable=False, boostable_into=frozenset()) - if temporary not in self.saves_or_reloads_added: - self.saves_or_reloads_added[temporary] = set() - self.saves_or_reloads_added[temporary].add(save_or_load_insn_id) + if mode == "save": + self.temporary_to_save_ids[temporary].add(save_or_load_insn_id) + else: + self.temporary_to_reload_ids[temporary].add(save_or_load_insn_id) + + self.subkernel_to_newly_added_insn_ids[subkernel].add(save_or_load_insn_id) self.insns_to_insert.append(save_or_load_insn) @@ -473,8 +527,8 @@ class TemporarySaver(object): self.insns_to_update[insn_id] = insn.copy( depends_on=insn.depends_on | frozenset([save_or_load_insn_id])) - self.updated_temporary_variables[promoted_temporary.name] = \ - promoted_temporary.as_kernel_temporary(self.kernel) + self.updated_temporary_variables[promoted_temporary.name] = ( + promoted_temporary.as_kernel_temporary(self.kernel)) self.updated_iname_to_tag.update(iname_to_tag) @@ -484,15 +538,6 @@ class TemporarySaver(object): insns_to_insert = dict((insn.id, insn) for insn in self.insns_to_insert) - # Add global no_sync_with between any added reloads and saves - from six import iteritems - for temporary, added_insns in iteritems(self.saves_or_reloads_added): - for insn_id in added_insns: - insn = insns_to_insert[insn_id] - insns_to_insert[insn_id] = insn.copy( - no_sync_with=frozenset( - (added_insn, "global") for added_insn in added_insns)) - for orig_insn in self.kernel.instructions: if orig_insn.id in self.insns_to_update: new_instructions.append(self.insns_to_update[orig_insn.id]) @@ -516,6 +561,18 @@ class TemporarySaver(object): temporary_variables=self.updated_temporary_variables, overridden_get_grid_sizes_for_insn_ids=None) + # Add nosync directives to any saves or reloads that were added with a + # potential dependency chain. + for subkernel in self.kernel.subkernels: + relevant_insns = self.subkernel_to_newly_added_insn_ids[subkernel] + + from itertools import product + for temporary in self.temporary_to_reload_ids: + for source, sink in product( + relevant_insns & self.temporary_to_reload_ids[temporary], + relevant_insns & self.temporary_to_save_ids[temporary]): + kernel = lp.add_nosync(kernel, "global", source, sink) + from loopy.kernel.tools import assign_automatic_axes return assign_automatic_axes(kernel) @@ -530,7 +587,7 @@ class TemporarySaver(object): """ Add new axes to the domain corresponding to the dimensions of `promoted_temporary`. These axes will be used in the save/ - reload stage. + reload stage. These get prefixed onto the already existing axes. """ assert mode in ("save", "reload") import islpy as isl @@ -637,7 +694,8 @@ def save_and_reload_temporaries(knl): liveness = LivenessAnalysis(knl) saver = TemporarySaver(knl) - insn_query = InstructionQuery(knl) + from loopy.schedule.tools import ( + temporaries_read_in_subkernel, temporaries_written_in_subkernel) for sched_idx, sched_item in enumerate(knl.schedule): @@ -648,9 +706,10 @@ def save_and_reload_temporaries(knl): # Kernel entry: nothing live interesting_temporaries = set() else: + subkernel = sched_item.kernel_name interesting_temporaries = ( - insn_query.temporaries_read_or_written_in_subkernel( - sched_item.kernel_name)) + temporaries_read_in_subkernel(knl, subkernel) + | temporaries_written_in_subkernel(knl, subkernel)) for temporary in liveness[sched_idx].live_out & interesting_temporaries: logger.info("reloading {0} at entry of {1}" @@ -662,9 +721,9 @@ def save_and_reload_temporaries(knl): # Kernel exit: nothing live interesting_temporaries = set() else: + subkernel = sched_item.kernel_name interesting_temporaries = ( - insn_query.temporaries_written_in_subkernel( - sched_item.kernel_name)) + temporaries_written_in_subkernel(knl, subkernel)) for temporary in liveness[sched_idx].live_in & interesting_temporaries: logger.info("saving {0} before return of {1}" diff --git a/test/test_scan.py b/test/test_scan.py index 71fe559fa8ab0114ae6c0ee5f4098c03327292c2..5c84d6e4d62f18693b1352d47bf78b0a01bb6508 100644 --- a/test/test_scan.py +++ b/test/test_scan.py @@ -54,8 +54,8 @@ __all__ = [ # More things to test. -# - test that dummy inames are removed # - scan(a) + scan(b) +# - test for badly tagged inames # - global parallel scan # TO DO: @@ -410,8 +410,7 @@ def _get_two_level_scan_kernel(g_size): knl = lp.realize_reduction(knl, force_scan=True) - from loopy.transform.instruction import add_nosync_to_instructions - knl = add_nosync_to_instructions( + knl = lp.add_nosync( knl, scope="global", source="writes:acc_j__l0", @@ -470,8 +469,7 @@ def _get_three_level_scan_kernel(g_size, p_size): knl = lp.realize_reduction(knl, force_scan=True) - from loopy.transform.instruction import add_nosync_to_instructions - knl = add_nosync_to_instructions( + knl = lp.add_nosync( knl, scope="global", source="writes:acc_j__l0", @@ -493,6 +491,8 @@ def _get_three_level_scan_kernel(g_size, p_size): # }}} +# TODO: Test everything from the matrix +# (l.0, seq) x (l.0, seq) @pytest.mark.parametrize("input_len", (1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 32)) @pytest.mark.parametrize("g_size", (16,)) @@ -530,6 +530,16 @@ def test_three_level_scan(ctx_getter, g_size, p_size, input_len): assert (out == np.cumsum(a)).all() +def test_scan_extra_constraints_on_domain(): + knl = lp.make_kernel( + "{[i,j,k]: 0<=i 1: exec(sys.argv[1]) diff --git a/test/test_transform.py b/test/test_transform.py index 0d63ba2846420a10676be7329c3ab08cc77396d9..b5fcdf04c4781c5f370c911ceb7efcb4042f6b4e 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -402,9 +402,40 @@ def test_precompute_with_preexisting_inames_fail(): precompute_inames="ii,jj") -def test_add_nosync_to_instructions(): - # FIXME: Write test. - pass +def test_add_nosync(): + orig_knl = lp.make_kernel("{[i]: 0<=i<10}", + """ + <>tmp[i] = 10 {id=insn1} + <>tmp2[i] = 10 {id=insn2} + + <>tmp3[2*i] = 0 {id=insn3} + <>tmp4 = 1 + tmp3[2*i] {id=insn4} + + <>tmp5[i] = 0 {id=insn5,groups=g1} + tmp5[i] = 1 {id=insn6,conflicts=g1} + """) + + orig_knl = lp.set_temporary_scope(orig_knl, "tmp3", "local") + orig_knl = lp.set_temporary_scope(orig_knl, "tmp5", "local") + + # No dependency present - don't add nosync + knl = lp.add_nosync(orig_knl, "any", "writes:tmp", "writes:tmp2") + assert frozenset() == knl.id_to_insn["insn2"].no_sync_with + + # Dependency present + knl = lp.add_nosync(orig_knl, "local", "writes:tmp3", "reads:tmp3") + assert frozenset() == knl.id_to_insn["insn3"].no_sync_with + assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with + + # Bidirectional + knl = lp.add_nosync( + orig_knl, "local", "writes:tmp3", "reads:tmp3", bidirectional=True) + assert frozenset([("insn4", "local")]) == knl.id_to_insn["insn3"].no_sync_with + assert frozenset([("insn3", "local")]) == knl.id_to_insn["insn4"].no_sync_with + + # Groups + knl = lp.add_nosync(orig_knl, "local", "insn5", "insn6") + assert frozenset([("insn5", "local")]) == knl.id_to_insn["insn6"].no_sync_with if __name__ == "__main__":