From 8d29bf32a4929f889728030c7539d4a5d174ef76 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 21 Oct 2017 16:19:51 +0200 Subject: [PATCH 1/4] Scan split: Get rid of unit-length nltail_inner iname --- loopy/transform/reduction.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/loopy/transform/reduction.py b/loopy/transform/reduction.py index 7b52d8fff..8a4175353 100644 --- a/loopy/transform/reduction.py +++ b/loopy/transform/reduction.py @@ -382,12 +382,6 @@ def make_two_level_scan( nonlocal_init_tail_outer_iname = var_name_gen( "{sweep}__l{level}_nltail_outer".format(**format_kwargs)) - # FIXME: This iname is not really needed. We should see about getting - # rid of it. That would also make the write race warning business below - # unnecessary. - nonlocal_init_tail_inner_iname = var_name_gen( - "{sweep}__l{level}_nltail_inner".format(**format_kwargs)) - nonlocal_iname = var_name_gen( "{sweep}__l{level}_nonloc".format(**format_kwargs)) @@ -633,8 +627,6 @@ def make_two_level_scan( nonlocal_storage_len = pw_aff_to_expr(1 + nonlocal_storage_len_pw_aff) - nonlocal_tail_inner_subd = _make_slab_set(nonlocal_init_tail_inner_iname, 1) - kernel = _add_subdomain_to_kernel(kernel, nonlocal_tail_inner_subd) nonlocal_tail_outer_subd = _make_slab_set( nonlocal_init_tail_outer_iname, nonlocal_storage_len_pw_aff) kernel = _add_subdomain_to_kernel(kernel, nonlocal_tail_outer_subd) @@ -650,7 +642,7 @@ def make_two_level_scan( #nonlocal_init_head_outer_iname: outer_local_tag, #nonlocal_init_head_inner_iname: inner_local_tag, nonlocal_init_tail_outer_iname: outer_local_tag, - nonlocal_init_tail_inner_iname: inner_local_tag}) + }) for nls_name in [nonlocal_storage_name, nonlocal_scan_storage_name]: if nls_name not in kernel.temporary_variables: @@ -678,11 +670,9 @@ def make_two_level_scan( expression=0, within_inames=( - within_inames | frozenset([nonlocal_init_tail_outer_iname, - nonlocal_init_tail_inner_iname])), + within_inames | frozenset([nonlocal_init_tail_outer_iname])), no_sync_with=frozenset([(nonlocal_init_tail_insn_id, "any")]), - predicates=(var(nonlocal_init_tail_inner_iname).eq(0), - var(nonlocal_init_tail_outer_iname).eq(0)), + predicates=(var(nonlocal_init_tail_outer_iname).eq(0),), depends_on=frozenset([local_scan_dep_id])) nonlocal_init_tail = make_assignment( @@ -693,23 +683,19 @@ def make_two_level_scan( expression=var(local_storage_name)[ pick_out_relevant_axes( (var(nonlocal_init_tail_outer_iname), - var(nonlocal_init_tail_inner_iname) - + local_storage_local_axis_len - 1), + local_storage_local_axis_len - 1), strip_scalar=True)], no_sync_with=frozenset([(nonlocal_init_head_insn_id, "any")]), within_inames=( - within_inames | frozenset([nonlocal_init_tail_outer_iname, - nonlocal_init_tail_inner_iname])), + within_inames | frozenset([nonlocal_init_tail_outer_iname])), depends_on=frozenset([local_scan_dep_id])) kernel = _update_instructions( kernel, (nonlocal_init_head, nonlocal_init_tail), copy=False) - # The write race warnings are spurious - the inner iname is length - # 1, so there's really no write race at all here. + # The write race warnings are spurious - a predicate prevents the write race. kernel = kernel.copy( silenced_warnings=kernel.silenced_warnings - + ["write_race(%s)" % nonlocal_init_tail_insn_id] + ["write_race(%s)" % nonlocal_init_head_insn_id]) # }}} -- GitLab From 61cff2cee369c3372369df95cc5f10e52faae882 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 21 Oct 2017 16:20:40 +0200 Subject: [PATCH 2/4] Save/load transform: Consider union of tags when determining relevant local axes for save/load temporary dimensioning --- loopy/transform/save.py | 54 +++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/loopy/transform/save.py b/loopy/transform/save.py index 2ba2338b0..1e32913d2 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -383,53 +383,49 @@ class TemporarySaver(object): self.kernel.reader_map()[temporary.name] | self.kernel.writer_map()[temporary.name]) - group_tags = None - local_tags = None - - def _sortedtags(tags): - return sorted(tags, key=lambda tag: tag.axis) + group_tags = {} + local_tags = {} for insn_id in accessor_insn_ids: insn = self.kernel.id_to_insn[insn_id] - my_group_tags = [] - my_local_tags = [] - for iname in insn.within_inames: tag = self.kernel.iname_to_tag.get(iname) - if tag is None: - continue - from loopy.kernel.data import ( - GroupIndexTag, LocalIndexTag, ConcurrentTag) + GroupIndexTag, LocalIndexTag, ConcurrentTag, UnrollTag, + ForceSequentialTag, InOrderSequentialSequentialTag) + if tag is None: + continue if isinstance(tag, GroupIndexTag): - my_group_tags.append(tag) + group_tags[tag.key] = tag elif isinstance(tag, LocalIndexTag): - my_local_tags.append(tag) + local_tags[tag.key] = tag elif isinstance(tag, ConcurrentTag): + # FIXME: ILP should really be supported, analogously to the + # group tags + raise LoopyError( "iname '%s' is tagged with '%s' - only " "group and local tags are supported for " "auto save/reload of temporaries" % (iname, tag)) - if group_tags is None: - group_tags = _sortedtags(my_group_tags) - local_tags = _sortedtags(my_local_tags) - group_tags_originating_insn_id = insn_id - - if ( - group_tags != _sortedtags(my_group_tags) - or local_tags != _sortedtags(my_local_tags)): - raise LoopyError( - "inconsistent parallel tags across instructions that access " - "'%s' (specifically, instruction '%s' has tags '%s' but " - "instruction '%s' has tags '%s')" - % (temporary.name, - group_tags_originating_insn_id, group_tags + local_tags, - insn_id, my_group_tags + my_local_tags)) + elif isinstance(tag, + (ForceSequentialTag, InOrderSequentialSequentialTag, + UnrollTag)): + continue + + else: + raise NotImplementedError( + "unexpected iname tag in save/load: %s" % tag) + + def _sortedtags(tags): + return sorted(tags, key=lambda tag: tag.axis) + + group_tags = _sortedtags(group_tags.values()) + local_tags = _sortedtags(local_tags.values()) if group_tags is None: assert local_tags is None -- GitLab From c6c0ae0e0834efd8daa10aea4a44d1bf12ab4702 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 21 Oct 2017 21:24:52 +0200 Subject: [PATCH 3/4] Delete reference to nltail_inner from tests --- test/test_scan.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_scan.py b/test/test_scan.py index 82add0e3f..b5d029f7e 100644 --- a/test/test_scan.py +++ b/test/test_scan.py @@ -511,8 +511,7 @@ def _get_three_level_scan_kernel(g_size, p_size): inner_local_tag="for", outer_local_tag="l.0") - knl = lp.tag_inames(knl, dict(i__l0="l.0", - i__l0_nltail_inner="l.0")) + knl = lp.tag_inames(knl, dict(i__l0="l.0")) knl = lp.realize_reduction(knl, force_scan=True) -- GitLab From 79cdbe93f3ea84030a4bd10903a3b25de7c0c1ca Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sat, 21 Oct 2017 21:42:43 +0200 Subject: [PATCH 4/4] Remove test_save_ambiguous_storage_requirements --- test/test_loopy.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/test/test_loopy.py b/test/test_loopy.py index 704fd391f..dc224520d 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1429,28 +1429,6 @@ def test_save_with_base_storage(ctx_factory, debug=False): save_and_reload_temporaries_test(queue, knl, np.arange(10), debug) -def test_save_ambiguous_storage_requirements(): - knl = lp.make_kernel( - "{[i,j]: 0 <= i < 10 and 0 <= j < 10}", - """ - <>a[j] = j - ... gbarrier - out[i,j] = a[j] - """, - seq_dependencies=True) - - knl = lp.tag_inames(knl, dict(i="g.0", j="l.0")) - knl = lp.duplicate_inames(knl, "j", within="writes:out", tags={"j": "l.0"}) - knl = lp.set_temporary_scope(knl, "a", "local") - - knl = lp.preprocess_kernel(knl) - knl = lp.get_one_scheduled_kernel(knl) - - from loopy.diagnostic import LoopyError - with pytest.raises(LoopyError): - lp.save_and_reload_temporaries(knl) - - def test_save_across_inames_with_same_tag(ctx_factory, debug=False): ctx = ctx_factory() queue = cl.CommandQueue(ctx) -- GitLab