diff --git a/loopy/transform/reduction.py b/loopy/transform/reduction.py index 7b52d8fff3da7d55198555a17c75d368ad61d894..8a41753530d2b40723c60c703de860038203c057 100644 --- a/loopy/transform/reduction.py +++ b/loopy/transform/reduction.py @@ -382,12 +382,6 @@ def make_two_level_scan( nonlocal_init_tail_outer_iname = var_name_gen( "{sweep}__l{level}_nltail_outer".format(**format_kwargs)) - # FIXME: This iname is not really needed. We should see about getting - # rid of it. That would also make the write race warning business below - # unnecessary. - nonlocal_init_tail_inner_iname = var_name_gen( - "{sweep}__l{level}_nltail_inner".format(**format_kwargs)) - nonlocal_iname = var_name_gen( "{sweep}__l{level}_nonloc".format(**format_kwargs)) @@ -633,8 +627,6 @@ def make_two_level_scan( nonlocal_storage_len = pw_aff_to_expr(1 + nonlocal_storage_len_pw_aff) - nonlocal_tail_inner_subd = _make_slab_set(nonlocal_init_tail_inner_iname, 1) - kernel = _add_subdomain_to_kernel(kernel, nonlocal_tail_inner_subd) nonlocal_tail_outer_subd = _make_slab_set( nonlocal_init_tail_outer_iname, nonlocal_storage_len_pw_aff) kernel = _add_subdomain_to_kernel(kernel, nonlocal_tail_outer_subd) @@ -650,7 +642,7 @@ def make_two_level_scan( #nonlocal_init_head_outer_iname: outer_local_tag, #nonlocal_init_head_inner_iname: inner_local_tag, nonlocal_init_tail_outer_iname: outer_local_tag, - nonlocal_init_tail_inner_iname: inner_local_tag}) + }) for nls_name in [nonlocal_storage_name, nonlocal_scan_storage_name]: if nls_name not in kernel.temporary_variables: @@ -678,11 +670,9 @@ def make_two_level_scan( expression=0, within_inames=( - within_inames | frozenset([nonlocal_init_tail_outer_iname, - nonlocal_init_tail_inner_iname])), + within_inames | frozenset([nonlocal_init_tail_outer_iname])), no_sync_with=frozenset([(nonlocal_init_tail_insn_id, "any")]), - predicates=(var(nonlocal_init_tail_inner_iname).eq(0), - var(nonlocal_init_tail_outer_iname).eq(0)), + predicates=(var(nonlocal_init_tail_outer_iname).eq(0),), depends_on=frozenset([local_scan_dep_id])) nonlocal_init_tail = make_assignment( @@ -693,23 +683,19 @@ def make_two_level_scan( expression=var(local_storage_name)[ pick_out_relevant_axes( (var(nonlocal_init_tail_outer_iname), - var(nonlocal_init_tail_inner_iname) - + local_storage_local_axis_len - 1), + local_storage_local_axis_len - 1), strip_scalar=True)], no_sync_with=frozenset([(nonlocal_init_head_insn_id, "any")]), within_inames=( - within_inames | frozenset([nonlocal_init_tail_outer_iname, - nonlocal_init_tail_inner_iname])), + within_inames | frozenset([nonlocal_init_tail_outer_iname])), depends_on=frozenset([local_scan_dep_id])) kernel = _update_instructions( kernel, (nonlocal_init_head, nonlocal_init_tail), copy=False) - # The write race warnings are spurious - the inner iname is length - # 1, so there's really no write race at all here. + # The write race warnings are spurious - a predicate prevents the write race. kernel = kernel.copy( silenced_warnings=kernel.silenced_warnings - + ["write_race(%s)" % nonlocal_init_tail_insn_id] + ["write_race(%s)" % nonlocal_init_head_insn_id]) # }}} diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 2ba2338b0af541274cc0362c9f71cec9c2887ffc..1e32913d29f9d420770c00cb73bbc44bdd2ed5b2 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -383,53 +383,49 @@ class TemporarySaver(object): self.kernel.reader_map()[temporary.name] | self.kernel.writer_map()[temporary.name]) - group_tags = None - local_tags = None - - def _sortedtags(tags): - return sorted(tags, key=lambda tag: tag.axis) + group_tags = {} + local_tags = {} for insn_id in accessor_insn_ids: insn = self.kernel.id_to_insn[insn_id] - my_group_tags = [] - my_local_tags = [] - for iname in insn.within_inames: tag = self.kernel.iname_to_tag.get(iname) - if tag is None: - continue - from loopy.kernel.data import ( - GroupIndexTag, LocalIndexTag, ConcurrentTag) + GroupIndexTag, LocalIndexTag, ConcurrentTag, UnrollTag, + ForceSequentialTag, InOrderSequentialSequentialTag) + if tag is None: + continue if isinstance(tag, GroupIndexTag): - my_group_tags.append(tag) + group_tags[tag.key] = tag elif isinstance(tag, LocalIndexTag): - my_local_tags.append(tag) + local_tags[tag.key] = tag elif isinstance(tag, ConcurrentTag): + # FIXME: ILP should really be supported, analogously to the + # group tags + raise LoopyError( "iname '%s' is tagged with '%s' - only " "group and local tags are supported for " "auto save/reload of temporaries" % (iname, tag)) - if group_tags is None: - group_tags = _sortedtags(my_group_tags) - local_tags = _sortedtags(my_local_tags) - group_tags_originating_insn_id = insn_id - - if ( - group_tags != _sortedtags(my_group_tags) - or local_tags != _sortedtags(my_local_tags)): - raise LoopyError( - "inconsistent parallel tags across instructions that access " - "'%s' (specifically, instruction '%s' has tags '%s' but " - "instruction '%s' has tags '%s')" - % (temporary.name, - group_tags_originating_insn_id, group_tags + local_tags, - insn_id, my_group_tags + my_local_tags)) + elif isinstance(tag, + (ForceSequentialTag, InOrderSequentialSequentialTag, + UnrollTag)): + continue + + else: + raise NotImplementedError( + "unexpected iname tag in save/load: %s" % tag) + + def _sortedtags(tags): + return sorted(tags, key=lambda tag: tag.axis) + + group_tags = _sortedtags(group_tags.values()) + local_tags = _sortedtags(local_tags.values()) if group_tags is None: assert local_tags is None diff --git a/test/test_loopy.py b/test/test_loopy.py index 704fd391f33ab9f3a24b3cc2b534a5b61bd3e90b..dc224520d020d0f368c99222a12922b53cf80a75 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1429,28 +1429,6 @@ def test_save_with_base_storage(ctx_factory, debug=False): save_and_reload_temporaries_test(queue, knl, np.arange(10), debug) -def test_save_ambiguous_storage_requirements(): - knl = lp.make_kernel( - "{[i,j]: 0 <= i < 10 and 0 <= j < 10}", - """ - <>a[j] = j - ... gbarrier - out[i,j] = a[j] - """, - seq_dependencies=True) - - knl = lp.tag_inames(knl, dict(i="g.0", j="l.0")) - knl = lp.duplicate_inames(knl, "j", within="writes:out", tags={"j": "l.0"}) - knl = lp.set_temporary_scope(knl, "a", "local") - - knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) - - from loopy.diagnostic import LoopyError - with pytest.raises(LoopyError): - lp.save_and_reload_temporaries(knl) - - def test_save_across_inames_with_same_tag(ctx_factory, debug=False): ctx = ctx_factory() queue = cl.CommandQueue(ctx) diff --git a/test/test_scan.py b/test/test_scan.py index 82add0e3ff3f9ccbc744f8f4e7374b44a5e8058c..b5d029f7e2ee00d956a8eb931edaad9bdf7c8993 100644 --- a/test/test_scan.py +++ b/test/test_scan.py @@ -511,8 +511,7 @@ def _get_three_level_scan_kernel(g_size, p_size): inner_local_tag="for", outer_local_tag="l.0") - knl = lp.tag_inames(knl, dict(i__l0="l.0", - i__l0_nltail_inner="l.0")) + knl = lp.tag_inames(knl, dict(i__l0="l.0")) knl = lp.realize_reduction(knl, force_scan=True)