diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 6b0033808c616829e60615b92849fa6353751a82..1125087cc9c5af90df70e54e8d0eda023b15e9c4 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -180,9 +180,19 @@ class LoopKernel(ImmutableRecordWithoutPickling): .. attribute:: iname_to_tags A :class:`dict` mapping inames (as strings) - to set of instances of :class:`loopy.kernel.data.IndexTag`. + to set of instances of :class:`loopy.kernel.data.InameTag`. .. versionadded:: 2018.1 + .. attribute:: iname_tag_parsers + + A list of parsers for iname tags. These are expected to return instances + of `loopy.kernel.data.InameTag` or `None` if the parser does not accept + the tag. A list of parsers for loopy builtin implementation tags is + automatically appended and the kernels target may add additional parsers + to the list as well. + + .. versionadded:: 2018.2 + .. attribute:: function_manglers .. attribute:: symbol_manglers @@ -236,6 +246,7 @@ class LoopKernel(ImmutableRecordWithoutPickling): local_sizes=None, temporary_variables=None, iname_to_tags=None, + iname_tag_parsers=None, substitutions=None, function_manglers=None, symbol_manglers=[], @@ -274,6 +285,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): temporary_variables = {} if iname_to_tags is None: iname_to_tags = {} + if iname_tag_parsers is None: + iname_tag_parsers = [] if substitutions is None: substitutions = {} if function_manglers is None: @@ -374,7 +387,8 @@ class LoopKernel(ImmutableRecordWithoutPickling): target=target, overridden_get_grid_sizes_for_insn_ids=( overridden_get_grid_sizes_for_insn_ids), - _cached_written_variables=_cached_written_variables) + _cached_written_variables=_cached_written_variables, + iname_tag_parsers=iname_tag_parsers) self._kernel_executor_cache = {} @@ -731,6 +745,30 @@ class LoopKernel(ImmutableRecordWithoutPickling): self.iname_to_tags.get(iname, frozenset()), tag_type_or_types, max_num=max_num, min_num=min_num) + def parse_iname_tag(self, tag): + if tag is None: + return tag + + from loopy.kernel.data import InameTag + if isinstance(tag, InameTag): + return tag + + from loopy.kernel.data import default_iname_tag_parser + parsers = (self.iname_tag_parsers + + self.target.iname_tag_parsers() + + [default_iname_tag_parser(self)]) + + for parser in parsers: + parse = parser(tag) + if parse is not None: + from loopy.kernel.data import InameTag + if not isinstance(parse, InameTag): + raise TypeError("Iname tag parse results are expected to " + "be instances of loopy.kernel.data.InameTag") + return parse + + raise ValueError("cannot parse tag: %s" % tag) + @memoize_method def all_inames(self): result = set() diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 3e776bd0609f8c4c6f63aadae811d97a0f97b579..1e9483939eea213c4445d77e35611513839b1ece 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -86,7 +86,7 @@ def filter_iname_tags_by_type(tags, tag_type, max_num=None, min_num=None): return result -class IndexTag(ImmutableRecord): +class InameTag(ImmutableRecord): __slots__ = [] def __hash__(self): @@ -112,7 +112,14 @@ class IndexTag(ImmutableRecord): return type(self).__name__ -class ConcurrentTag(IndexTag): +class IndexTag(InameTag): + def __init__(self): + warn("IndexTag is deprecated, use InameTag instead.", + DeprecationWarning, stacklevel=2) + InameTag.__init__(self) + + +class ConcurrentTag(InameTag): pass @@ -125,7 +132,7 @@ ParallelTag = ConcurrentTag HardwareParallelTag = HardwareConcurrentTag -class UniqueTag(IndexTag): +class UniqueTag(InameTag): pass @@ -191,53 +198,57 @@ class VectorizeTag(UniqueTag): return "vec" -class UnrollTag(IndexTag): +class UnrollTag(InameTag): def __str__(self): return "unr" -class ForceSequentialTag(IndexTag): +class ForceSequentialTag(InameTag): def __str__(self): return "forceseq" -class InOrderSequentialSequentialTag(IndexTag): +class InOrderSequentialSequentialTag(InameTag): def __str__(self): return "ord" -def parse_tag(tag): - if tag is None: - return tag - - if isinstance(tag, IndexTag): - return tag - - if not isinstance(tag, str): - raise ValueError("cannot parse tag: %s" % tag) - - if tag == "for": - return None - elif tag == "ord": - return InOrderSequentialSequentialTag() - elif tag in ["unr"]: - return UnrollTag() - elif tag in ["vec"]: - return VectorizeTag() - elif tag in ["ilp", "ilp.unr"]: - return UnrolledIlpTag() - elif tag == "ilp.seq": - return LoopedIlpTag() - elif tag.startswith("g."): - return GroupIndexTag(int(tag[2:])) - elif tag.startswith("l."): - axis = tag[2:] - if axis == "auto": - return AutoFitLocalIndexTag() - else: - return LocalIndexTag(int(axis)) - else: - raise ValueError("cannot parse tag: %s" % tag) +def default_iname_tag_parser(kernel): + def _parser(tag): + if tag == "ord": + return InOrderSequentialSequentialTag() + elif tag in ["unr"]: + return UnrollTag() + elif tag in ["vec"]: + return VectorizeTag() + elif tag in ["ilp", "ilp.unr"]: + return UnrolledIlpTag() + elif tag == "ilp.seq": + return LoopedIlpTag() + elif tag.startswith("g."): + return GroupIndexTag(int(tag[2:])) + elif tag.startswith("l."): + axis = tag[2:] + if axis == "auto": + return AutoFitLocalIndexTag() + else: + return LocalIndexTag(int(axis)) + elif tag.startswith("like."): + tags = kernel.iname_to_tags[tag[5:]] + if len(tags) == 0: + return None + if len(tags) == 1: + return tags[0] + else: + raise LoopyError("cannot use like for multiple tags (for now)") + elif tag == "unused.g": + from loopy.transform.iname import find_unused_axis_tag + return find_unused_axis_tag(kernel, "g") + elif tag == "unused.l": + from loopy.transform.iname import find_unused_axis_tag + return find_unused_axis_tag(kernel, "l") + + return _parser # }}} @@ -718,16 +729,19 @@ class TemporaryVariable(ArrayBase): # }}} - -def iname_tag_to_temp_var_scope(iname_tag): - iname_tag = parse_tag(iname_tag) - - if isinstance(iname_tag, GroupIndexTag): - return AddressSpace.GLOBAL - elif isinstance(iname_tag, LocalIndexTag): - return AddressSpace.LOCAL - else: - return AddressSpace.PRIVATE +# TODO: This function would need a kernel to actually be able +# to call parse_tag. What is it meant for anyway? Backwards +# compatibility? +# +# def iname_tag_to_temp_var_scope(iname_tag): +# iname_tag = parse_tag(iname_tag) +# +# if isinstance(iname_tag, GroupIndexTag): +# return AddressSpace.GLOBAL +# elif isinstance(iname_tag, LocalIndexTag): +# return AddressSpace.LOCAL +# else: +# return AddressSpace.PRIVATE # {{{ substitution rule diff --git a/loopy/match.py b/loopy/match.py index 3c047e463939cd67a4878d202a754c0cab48058d..7cd7bb8626bfe9b1eea40fc794be78b5e582ee9c 100644 --- a/loopy/match.py +++ b/loopy/match.py @@ -50,6 +50,7 @@ Match expressions .. autoclass:: Writes .. autoclass:: Reads .. autoclass:: Iname +.. autoclass:: InameTagged """ @@ -74,6 +75,7 @@ _tag = intern("_tag") _writes = intern("_writes") _reads = intern("_reads") _iname = intern("_iname") +_inametag = intern("_inametag") _whitespace = intern("_whitespace") @@ -93,12 +95,13 @@ _LEX_TABLE = [ (_writes, RE(r"writes:([\w?*]+)")), (_reads, RE(r"reads:([\w?*]+)")), (_iname, RE(r"iname:([\w?*]+)")), + (_inametag, RE(r"inametag:([\w?*]+)")), (_whitespace, RE("[ \t]+")), ] -_TERMINALS = ([_id, _tag, _writes, _reads, _iname]) +_TERMINALS = ([_id, _tag, _writes, _reads, _iname, _inametag]) # {{{ operator precedence @@ -267,6 +270,19 @@ class Iname(GlobMatchExpressionBase): return any(self.re.match(name) for name in matchable.within_inames) + +class InameTagged(MatchExpressionBase): + def __init__(self, tag): + self.tag = tag + + def __call__(self, kernel, matchable): + tag = self.tag + if isinstance(tag, str): + tag = kernel.parse_iname_tag(tag) + + return any(tag in kernel.iname_to_tags[iname] + for iname in matchable.within_inames) + # }}} @@ -303,6 +319,10 @@ def parse_match(expr): result = Iname(pstate.next_match_obj().group(1)) pstate.advance() return result + elif next_tag is _inametag: + result = InameTagged(pstate.next_match_obj().group(1)) + pstate.advance() + return result else: pstate.expected("terminal") diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index a81354e2fd7b52ba514af936441c7a2d980c77b5..94251b146b067a47376a9a6be22456f59c44cfca 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -140,6 +140,9 @@ class TargetBase(object): """ raise NotImplementedError() + def iname_tag_parsers(self): + return [] + class ASTBuilderBase(object): """An interface for generating (host or device) ASTs. diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 2b618a464b5103ee28bceded07dc68f9c376c84d..b116ad52eab27fc93b49c0e3e1cab4a0a0f9b3e0 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -670,26 +670,8 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False): unpack_iname_to_tag.append((iname, tags)) iname_to_tag = unpack_iname_to_tag - from loopy.kernel.data import parse_tag as inner_parse_tag - - def parse_tag(tag): - if isinstance(tag, str): - if tag.startswith("like."): - tags = kernel.iname_tags(tag[5:]) - if len(tags) == 0: - return None - if len(tags) == 1: - return tags[0] - else: - raise LoopyError("cannot use like for multiple tags (for now)") - elif tag == "unused.g": - return find_unused_axis_tag(kernel, "g") - elif tag == "unused.l": - return find_unused_axis_tag(kernel, "l") - - return inner_parse_tag(tag) - - iname_to_tag = [(iname, parse_tag(tag)) for iname, tag in iname_to_tag] + iname_to_tag = [(iname, kernel.parse_iname_tag(tag)) + for iname, tag in iname_to_tag] from loopy.kernel.data import (ConcurrentTag, ForceSequentialTag, filter_iname_tags_by_type) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 52d568975216699f53b6a038d0ce775b89dbc4b0..58f56411caddb8d7df3d8c0e626a3cfa60da9d2c 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -479,8 +479,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, default_tag = "l.auto" - from loopy.kernel.data import parse_tag - default_tag = parse_tag(default_tag) + default_tag = kernel.parse_iname_tag(default_tag) # }}} diff --git a/test/test_apps.py b/test/test_apps.py index e7f4004fa0f2285920bdf9a0848c0d400e2c31b7..6cde042ea0885c663252874d301f2f087d893bb7 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -549,7 +549,7 @@ def test_fd_1d(ctx_factory): knl = lp.split_iname(knl, "i", 16) knl = lp.extract_subst(knl, "u_acc", "u[j]", parameters="j") - knl = lp.precompute(knl, "u_acc", "i_inner", default_tag="for") + knl = lp.precompute(knl, "u_acc", "i_inner", default_tag=None) knl = lp.assume(knl, "n mod 16 = 0") lp.auto_test_vs_ref( @@ -589,12 +589,12 @@ def test_poisson_fem(ctx_factory): knl = lp.prioritize_loops(knl, ["c", "j", "i", "k"]) def variant_1(knl): - knl = lp.precompute(knl, "dpsi", "i,k,ell", default_tag='for') + knl = lp.precompute(knl, "dpsi", "i,k,ell", default_tag=None) knl = lp.prioritize_loops(knl, "c,i,j") return knl def variant_2(knl): - knl = lp.precompute(knl, "dpsi", "i,ell", default_tag='for') + knl = lp.precompute(knl, "dpsi", "i,ell", default_tag=None) knl = lp.prioritize_loops(knl, "c,i,j") return knl diff --git a/test/test_loopy.py b/test/test_loopy.py index accf9c1dff5a1f660871dd63d6af3337aced6490..a23a483b6d5de0730b28c7f47bbcbdbc394e4d8a 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -861,7 +861,7 @@ def test_slab_decomposition_does_not_double_execute(ctx_factory): ref_knl = knl - for outer_tag in ["for", "g.0"]: + for outer_tag in [None, "g.0"]: knl = ref_knl knl = lp.split_iname(knl, "i", 4, slabs=(0, 1), inner_tag="unr", outer_tag=outer_tag) diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py index 6b578838d99cb5aa28296619fdec6e8a2359ba0b..63f0b128597e5b8fda831a09d41dfd411eadf296 100644 --- a/test/test_numa_diff.py +++ b/test/test_numa_diff.py @@ -185,7 +185,7 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level): # noqa Q_dim_field_outer="unr")) hsv = lp.buffer_array(hsv, "rhsQ", ilp_inames, - fetch_bounding_box=True, default_tag="for", + fetch_bounding_box=True, default_tag=None, init_expression="0", store_expression="base + buffer") if opt_level == 5: diff --git a/test/test_scan.py b/test/test_scan.py index 101d8fc35f224c02ac6e836cbb49f65b3dd387a4..c2c7e1150aa30a165ea68b12815e9938142289ed 100644 --- a/test/test_scan.py +++ b/test/test_scan.py @@ -165,7 +165,7 @@ def test_dependent_domain_scan(ctx_factory): @pytest.mark.parametrize("i_tag, j_tag", [ - ("for", "for") + (None, None) ]) def test_nested_scan(ctx_factory, i_tag, j_tag): ctx = ctx_factory() @@ -266,7 +266,7 @@ def test_scan_extra_constraints_on_domain(): knl, force_scan=True, force_outer_iname_for_scan="i") -@pytest.mark.parametrize("sweep_iname_tag", ["for", "l.1"]) +@pytest.mark.parametrize("sweep_iname_tag", [None, "l.1"]) def test_scan_with_outer_parallel_iname(ctx_factory, sweep_iname_tag): ctx = ctx_factory() queue = cl.CommandQueue(ctx) @@ -338,7 +338,7 @@ def test_scan_unsupported_tags(): pass -@pytest.mark.parametrize("i_tag", ["for", "l.0"]) +@pytest.mark.parametrize("i_tag", [None, "l.0"]) def test_argmax(ctx_factory, i_tag): logging.basicConfig(level=logging.INFO) @@ -398,7 +398,7 @@ def check_segmented_scan_output(arr, segment_boundaries_indices, out): (3, (0, 2)), (3, (0, 1, 2)), (16, (0, 4, 8, 12))]) -@pytest.mark.parametrize("iname_tag", ("for", "l.0")) +@pytest.mark.parametrize("iname_tag", (None, "l.0")) def test_segmented_scan(ctx_factory, n, segment_boundaries_indices, iname_tag): ctx = ctx_factory() queue = cl.CommandQueue(ctx) diff --git a/test/test_transform.py b/test/test_transform.py index ed184fb50c099d5fb2a6a0941d2f2c22c3b757bc..617022d91eb94241d77bee7b74aa0304c83548d1 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -441,9 +441,9 @@ def test_precompute_with_preexisting_inames(ctx_factory): knl = lp.extract_subst(knl, "D1_subst", "D1[ii,jj]", parameters="ii,jj") knl = lp.extract_subst(knl, "D2_subst", "D2[ii,jj]", parameters="ii,jj") - knl = lp.precompute(knl, "D1_subst", "i,j", default_tag="for", + knl = lp.precompute(knl, "D1_subst", "i,j", default_tag=None, precompute_inames="ii,jj") - knl = lp.precompute(knl, "D2_subst", "i,k", default_tag="for", + knl = lp.precompute(knl, "D2_subst", "i,k", default_tag=None, precompute_inames="ii,jj") knl = lp.prioritize_loops(knl, "ii,jj,e,j,k") @@ -472,10 +472,10 @@ def test_precompute_with_preexisting_inames_fail(): knl = lp.extract_subst(knl, "D1_subst", "D1[ii,jj]", parameters="ii,jj") knl = lp.extract_subst(knl, "D2_subst", "D2[ii,jj]", parameters="ii,jj") - knl = lp.precompute(knl, "D1_subst", "i,j", default_tag="for", + knl = lp.precompute(knl, "D1_subst", "i,j", default_tag=None, precompute_inames="ii,jj") with pytest.raises(lp.LoopyError): - lp.precompute(knl, "D2_subst", "i,k", default_tag="for", + lp.precompute(knl, "D2_subst", "i,k", default_tag=None, precompute_inames="ii,jj")