diff --git a/loopy/__init__.py b/loopy/__init__.py index 89683e0b466714700f18b090ec365d5861ea4d05..cbef34d5476b140592ec1d396da13bf66d0898a2 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -75,7 +75,7 @@ from loopy.transform.iname import ( affine_map_inames, find_unused_axis_tag, make_reduction_inames_unique, has_schedulable_iname_nesting, get_iname_duplication_options, - add_inames_to_insn) + add_inames_to_insn, make_inames_innermost) from loopy.transform.instruction import ( find_instructions, map_instructions, @@ -185,7 +185,7 @@ __all__ = [ "affine_map_inames", "find_unused_axis_tag", "make_reduction_inames_unique", "has_schedulable_iname_nesting", "get_iname_duplication_options", - "add_inames_to_insn", + "add_inames_to_insn", "make_inames_innermost", "add_prefetch", "change_arg_to_image", "tag_array_axes", "tag_data_axes", diff --git a/loopy/transform/ilp.py b/loopy/transform/ilp.py index 0ac71d603ebe8b5150fb854dd3978676dd9d98c3..b648ad50597e424eaa09af15c20a6d46abffeadc 100644 --- a/loopy/transform/ilp.py +++ b/loopy/transform/ilp.py @@ -87,13 +87,6 @@ def add_axes_to_temporaries_for_ilp_and_vec(kernel, iname=None): kernel.iname_to_tag.get(iname), (IlpBaseTag, VectorizeTag))) else: - if not isinstance( - kernel.iname_to_tag.get(iname), - (IlpBaseTag, VectorizeTag)): - raise LoopyError( - "'%s' is not an ILP iname" - % iname) - ilp_inames = frozenset([iname]) referenced_ilp_inames = (ilp_inames diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 2347cef3c04d2a44cef91782700e097a20e19712..f5afbedd8412cfbdb38befe236850c3b0b11c228 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -1687,4 +1687,41 @@ def add_inames_to_insn(knl, inames, insn_match): # }}} +# {{{ push iname to innermost + +def make_inames_innermost(knl, inames): + """ + :arg inames: a frozenset of inames that will be pushed to the innermost + loop + + .. versionadded:: 2018.1 + """ + if isinstance(inames, str): + inames = frozenset(s.strip() for s in inames.split(",")) + + if not isinstance(inames, frozenset): + raise TypeError("'inames' must be a frozenset") + + from loopy.transform.ilp import add_axes_to_temporaries_for_ilp_and_vec + + innermost_inames = [] + + name_gen = knl.get_var_name_generator() + for iname in inames: + knl = add_axes_to_temporaries_for_ilp_and_vec(knl, iname) + # split inner loops + for insn in knl.instructions: + new_iname = name_gen(iname) + knl = duplicate_inames(knl, iname, "id:{0}".format(insn.id), + new_inames=[new_iname]) + innermost_inames.append(new_iname) + + # push newly duplicated inames innermost + other_inames = knl.all_inames() - frozenset(innermost_inames) + knl = prioritize_loops(knl, other_inames) + return knl + +# }}} + + # vim: foldmethod=marker