From 84163f0ca3763551d521569a44c628b9af65db02 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Tue, 12 Jul 2016 19:08:14 +0200 Subject: [PATCH 01/24] Implement a generator for duplication options in case of non-schedulable kernels Some kernels require the duplication of an iname to be logically schedulable. This commit introduces an algorithm that lists such options. It also provides a function to check whether such transformation is needed. --- loopy/__init__.py | 3 +- loopy/transform/iname.py | 80 ++++++++++++++++++++++++++++++++++++++++ test/test_loopy.py | 19 ++++++++++ 3 files changed, 101 insertions(+), 1 deletion(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 50660f60a..d4afd2b7b 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -65,7 +65,8 @@ from loopy.transform.iname import ( rename_iname, remove_unused_inames, split_reduction_inward, split_reduction_outward, affine_map_inames, find_unused_axis_tag, - make_reduction_inames_unique) + make_reduction_inames_unique, + needs_iname_duplication, get_iname_duplication_options) from loopy.transform.instruction import ( find_instructions, map_instructions, diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 4c3cd0a69..09926e7bd 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -796,6 +796,86 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, # }}} +# {{{ iname duplication for schedulability + +def _get_iname_duplication_options(insn_deps): + # Remove common inames of the current insn_deps, as they are not relevant for splitting. + def remove_common_iname(deps): + common = frozenset({}).union(*insn_deps).intersection(*insn_deps) + if common: + iname = iter(common).next() + return frozenset(d - frozenset([iname]) for d in deps) - frozenset([frozenset([])]), False + else: + return deps, True + + stop = False + while not stop: + insn_deps, stop = remove_common_iname(insn_deps) + + # Try finding a partitioning of the remaining inames, such that all instructions + # use only inames from one of the disjoint sets from the partitioning. + def join_sets_if_not_disjoint(sets): + for s1 in sets: + for s2 in sets: + if s1 != s2 and s1.intersection(s2): + return (sets - frozenset([s1, s2])).union(frozenset([s1.union(s2)])), False + + return sets, True + + partitioning = insn_deps + stop = False + while not stop: + partitioning, stop = join_sets_if_not_disjoint(partitioning) + + # If a partitioning was found we recursively apply this algorithm to the subproblems + if len(partitioning) > 1: + for part in partitioning: + working_set = frozenset(s for s in insn_deps if s.issubset(part)) + for option in _get_iname_duplication_options(working_set): + yield option + # If exactly one set was found, an iname duplication is necessary + elif len(partitioning) == 1: + inames, = partitioning + + # There are splitting options for all inames + for iname in inames: + iname_insns = frozenset(insn for insn in insn_deps if frozenset([iname]).issubset(insn)) + + import itertools as it + # For a given iname, the set of instructions containing this iname is inspected. + # For each element of the power set without the empty and the full set, one + # duplication option is generated. + for insns_to_dup in it.chain.from_iterable(it.combinations(iname_insns, l) for l in range(1, len(iname_insns))): + yield iname, insns_to_dup + + # If partitioning was empty, we have recursed successfully and yield nothing + + +def get_iname_duplication_options(knl): + """ + returns all options to duplicate inames, if duplication of an iname is necessary + to ensure the schedulability of the kernel. duplication options are returned as + tuples (iname, within) as understood by loopy.duplicate_inames + """ + # First we extract the minimal necessary information from the kernel + insn_deps = frozenset(insn.forced_iname_deps for insn in knl.instructions) + + # Get the duplication options as a tuple of iname and a set + for iname, insns in _get_iname_duplication_options(insn_deps): + # Reconstruct an object that may be passed to the within parameter of + # loopy.duplicate_inames + within = ' or '.join('id:%s' % insn.id for insn in knl.instructions if insn.forced_iname_deps in insns) + yield iname, within + + +def needs_iname_duplication(knl): + """ + returns a bool indicating whether this kernel needs an iname duplication + in order to be schedulable. + """ + return bool(next(get_iname_duplication_options(knl), False)) + +# }}} # {{{ rename_inames diff --git a/test/test_loopy.py b/test/test_loopy.py index 6afd913de..fa836af97 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1237,6 +1237,25 @@ def test_finite_difference_expr_subst(ctx_factory): evt, _ = precomp_knl(queue, u=u, h=h) +def test_unschedulable_kernel_detection(): + knl = lp.make_kernel(["{[i,j]:0<=i,j 1: exec(sys.argv[1]) -- GitLab From 45113adc7a2fbc368b517ae25a01fea5c987f505 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Wed, 13 Jul 2016 14:33:47 +0200 Subject: [PATCH 02/24] Simplify removal of common inames All common inames can be removed in one go, no need to do it one by one. --- loopy/transform/iname.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 09926e7bd..8d2dad4ab 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -800,17 +800,8 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, def _get_iname_duplication_options(insn_deps): # Remove common inames of the current insn_deps, as they are not relevant for splitting. - def remove_common_iname(deps): - common = frozenset({}).union(*insn_deps).intersection(*insn_deps) - if common: - iname = iter(common).next() - return frozenset(d - frozenset([iname]) for d in deps) - frozenset([frozenset([])]), False - else: - return deps, True - - stop = False - while not stop: - insn_deps, stop = remove_common_iname(insn_deps) + common = frozenset([]).union(*insn_deps).intersection(*insn_deps) + insn_deps = frozenset(dep - common for dep in insn_deps) - frozenset([frozenset([])]) # Try finding a partitioning of the remaining inames, such that all instructions # use only inames from one of the disjoint sets from the partitioning. -- GitLab From 1cd034c49246423cd7ac3a05d97437063fba72ea Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Wed, 13 Jul 2016 15:16:13 +0200 Subject: [PATCH 03/24] [bugfix] Reintroduce common inames in the result --- loopy/transform/iname.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 8d2dad4ab..7bea9119c 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -837,7 +837,7 @@ def _get_iname_duplication_options(insn_deps): # For each element of the power set without the empty and the full set, one # duplication option is generated. for insns_to_dup in it.chain.from_iterable(it.combinations(iname_insns, l) for l in range(1, len(iname_insns))): - yield iname, insns_to_dup + yield iname, tuple(insn.union(common) for insn in insns_to_dup) # If partitioning was empty, we have recursed successfully and yield nothing -- GitLab From 5afe7de9ab9bb76a843ff2fc783c8945eea809ae Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Wed, 13 Jul 2016 15:16:59 +0200 Subject: [PATCH 04/24] Add additional test for duplication options --- test/test_loopy.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/test_loopy.py b/test/test_loopy.py index fa836af97..e8de0e6f0 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1255,6 +1255,17 @@ def test_unschedulable_kernel_detection(): fixed_knl = lp.duplicate_inames(knl, inames, insns) assert not lp.needs_iname_duplication(fixed_knl) + knl = lp.make_kernel(["{[i,j,k,l,m]:0<=i,j,k,l,m 1: -- GitLab From 0a426c353798e30f9164a73f6bc5f50a0ab8e649 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Mon, 25 Jul 2016 18:29:14 +0200 Subject: [PATCH 05/24] [bugfix] Carry all common inames through recursion correctly --- loopy/transform/iname.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 7bea9119c..8d9af4624 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -798,10 +798,12 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, # {{{ iname duplication for schedulability -def _get_iname_duplication_options(insn_deps): +def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): # Remove common inames of the current insn_deps, as they are not relevant for splitting. common = frozenset([]).union(*insn_deps).intersection(*insn_deps) insn_deps = frozenset(dep - common for dep in insn_deps) - frozenset([frozenset([])]) + # Join the common inames with those found in recursion + common = common.union(old_common_inames) # Try finding a partitioning of the remaining inames, such that all instructions # use only inames from one of the disjoint sets from the partitioning. @@ -822,7 +824,7 @@ def _get_iname_duplication_options(insn_deps): if len(partitioning) > 1: for part in partitioning: working_set = frozenset(s for s in insn_deps if s.issubset(part)) - for option in _get_iname_duplication_options(working_set): + for option in _get_iname_duplication_options(working_set, common): yield option # If exactly one set was found, an iname duplication is necessary elif len(partitioning) == 1: -- GitLab From 9c83b608d95791899e7f894c13edd32d1ba1235f Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Wed, 27 Jul 2016 12:01:23 +0200 Subject: [PATCH 06/24] Work in feedback from code review --- loopy/transform/iname.py | 39 +++++++++++++++++++++++++++++++++------ test/test_loopy.py | 4 ++-- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 8d9af4624..4c7b91d4f 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -48,6 +48,10 @@ __doc__ = """ .. autofunction:: duplicate_inames +.. autofunction:: get_iname_duplication_options + +.. autofunction:: needs_iname_duplication + .. autofunction:: rename_iname .. autofunction:: remove_unused_inames @@ -845,10 +849,32 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): def get_iname_duplication_options(knl): - """ - returns all options to duplicate inames, if duplication of an iname is necessary - to ensure the schedulability of the kernel. duplication options are returned as - tuples (iname, within) as understood by loopy.duplicate_inames + """List options for duplication of inames, if necessary for schedulability + + :returns: a generator listing all options to duplicate inames, if duplication of an iname is necessary + to ensure the schedulability of the kernel. Duplication options are returned as + tuples (iname, within) as understood by :func:`duplicate_inames`. There is no guarantee, + that the transformed kernel will be schedulable, because multiple duplications of iname + may be necessary. + + Some kernels require the duplication of inames in order to be schedulable, as the + forced iname dependencies define an over-determined problem to the scheduler. + Consider the following minimal example: + + knl = lp.make_kernel(["{[i,j]:0<=i,j Date: Wed, 27 Jul 2016 12:48:11 -0500 Subject: [PATCH 07/24] PEP8 fixes --- loopy/transform/iname.py | 54 ++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 4c7b91d4f..c68f46a44 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -800,12 +800,18 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None, # }}} + # {{{ iname duplication for schedulability def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): - # Remove common inames of the current insn_deps, as they are not relevant for splitting. + # Remove common inames of the current insn_deps, as they are not relevant + # for splitting. common = frozenset([]).union(*insn_deps).intersection(*insn_deps) - insn_deps = frozenset(dep - common for dep in insn_deps) - frozenset([frozenset([])]) + insn_deps = ( + frozenset(dep - common for dep in insn_deps) + - + frozenset([frozenset([])])) + # Join the common inames with those found in recursion common = common.union(old_common_inames) @@ -815,7 +821,10 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): for s1 in sets: for s2 in sets: if s1 != s2 and s1.intersection(s2): - return (sets - frozenset([s1, s2])).union(frozenset([s1.union(s2)])), False + return ( + (sets - frozenset([s1, s2])) + | frozenset([s1 | s2]) + ), False return sets, True @@ -824,7 +833,8 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): while not stop: partitioning, stop = join_sets_if_not_disjoint(partitioning) - # If a partitioning was found we recursively apply this algorithm to the subproblems + # If a partitioning was found we recursively apply this algorithm to the + # subproblems if len(partitioning) > 1: for part in partitioning: working_set = frozenset(s for s in insn_deps if s.issubset(part)) @@ -836,13 +846,16 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): # There are splitting options for all inames for iname in inames: - iname_insns = frozenset(insn for insn in insn_deps if frozenset([iname]).issubset(insn)) + iname_insns = frozenset( + insn for insn in insn_deps if frozenset([iname]).issubset(insn)) import itertools as it - # For a given iname, the set of instructions containing this iname is inspected. - # For each element of the power set without the empty and the full set, one - # duplication option is generated. - for insns_to_dup in it.chain.from_iterable(it.combinations(iname_insns, l) for l in range(1, len(iname_insns))): + # For a given iname, the set of instructions containing this iname + # is inspected. For each element of the power set without the + # empty and the full set, one duplication option is generated. + for insns_to_dup in it.chain.from_iterable( + it.combinations(iname_insns, l) + for l in range(1, len(iname_insns))): yield iname, tuple(insn.union(common) for insn in insns_to_dup) # If partitioning was empty, we have recursed successfully and yield nothing @@ -851,11 +864,12 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): def get_iname_duplication_options(knl): """List options for duplication of inames, if necessary for schedulability - :returns: a generator listing all options to duplicate inames, if duplication of an iname is necessary - to ensure the schedulability of the kernel. Duplication options are returned as - tuples (iname, within) as understood by :func:`duplicate_inames`. There is no guarantee, - that the transformed kernel will be schedulable, because multiple duplications of iname - may be necessary. + :returns: a generator listing all options to duplicate inames, if duplication + of an iname is necessary to ensure the schedulability of the kernel. + Duplication options are returned as tuples (iname, within) as + understood by :func:`duplicate_inames`. There is no guarantee, that the + transformed kernel will be schedulable, because multiple duplications + of iname may be necessary. Some kernels require the duplication of inames in order to be schedulable, as the forced iname dependencies define an over-determined problem to the scheduler. @@ -874,7 +888,8 @@ def get_iname_duplication_options(knl): * duplicating j in instruction i2 * duplicating i in instruction i2 and i3 - Use :func:`needs_iname_duplication` to decide, whether an iname needs to be duplicated in a given kernel. + Use :func:`needs_iname_duplication` to decide, whether an iname needs to be + duplicated in a given kernel. """ # First we extract the minimal necessary information from the kernel insn_deps = frozenset(insn.forced_iname_deps for insn in knl.instructions) @@ -884,19 +899,22 @@ def get_iname_duplication_options(knl): # Reconstruct an object that may be passed to the within parameter of # loopy.duplicate_inames from loopy.match import Id, Or - within = Or(list(Id(insn.id) for insn in knl.instructions if insn.forced_iname_deps in insns)) + within = Or(tuple( + Id(insn.id) for insn in knl.instructions + if insn.forced_iname_deps in insns)) yield iname, within def needs_iname_duplication(knl): """ - :returns: a :class:`bool` indicating whether this kernel needs an iname duplication - in order to be schedulable. + :returns: a :class:`bool` indicating whether this kernel needs + an iname duplication in order to be schedulable. """ return bool(next(get_iname_duplication_options(knl), False)) # }}} + # {{{ rename_inames def rename_iname(knl, old_iname, new_iname, existing_ok=False, within=None): -- GitLab From 5645e1b9ca60d6cb1d9f9bc96a2eeab1e967eb41 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 27 Jul 2016 13:42:49 -0500 Subject: [PATCH 08/24] Minor doc tweak --- loopy/transform/iname.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index c68f46a44..5d35e07c4 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -882,7 +882,7 @@ def get_iname_duplication_options(knl): mat3[i] = mat3[i] + 1 {inames=i, id=i3} \"\"\") - In the example, there are four possibilities to resolve the problem + In the example, there are four possibilities to resolve the problem: * duplicating i in instruction i3 * duplicating i in instruction i1 and i3 * duplicating j in instruction i2 -- GitLab From 28aa43058900625b87c98848882354ba93e90510 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Thu, 28 Jul 2016 08:50:21 +0200 Subject: [PATCH 09/24] needs_iname_duplication -> has_schedulable_iname_nesting --- loopy/__init__.py | 2 +- loopy/transform/iname.py | 8 ++++---- test/test_loopy.py | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index 7f2d151e1..3daa00899 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -66,7 +66,7 @@ from loopy.transform.iname import ( split_reduction_inward, split_reduction_outward, affine_map_inames, find_unused_axis_tag, make_reduction_inames_unique, - needs_iname_duplication, get_iname_duplication_options) + has_schedulable_iname_nesting, get_iname_duplication_options) from loopy.transform.instruction import ( find_instructions, map_instructions, diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 5d35e07c4..8930e6df4 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -50,7 +50,7 @@ __doc__ = """ .. autofunction:: get_iname_duplication_options -.. autofunction:: needs_iname_duplication +.. autofunction:: has_schedulable_iname_nesting .. autofunction:: rename_iname @@ -888,7 +888,7 @@ def get_iname_duplication_options(knl): * duplicating j in instruction i2 * duplicating i in instruction i2 and i3 - Use :func:`needs_iname_duplication` to decide, whether an iname needs to be + Use :func:`has_schedulable_iname_nesting` to decide, whether an iname needs to be duplicated in a given kernel. """ # First we extract the minimal necessary information from the kernel @@ -905,12 +905,12 @@ def get_iname_duplication_options(knl): yield iname, within -def needs_iname_duplication(knl): +def has_schedulable_iname_nesting(knl): """ :returns: a :class:`bool` indicating whether this kernel needs an iname duplication in order to be schedulable. """ - return bool(next(get_iname_duplication_options(knl), False)) + return not bool(next(get_iname_duplication_options(knl), False)) # }}} diff --git a/test/test_loopy.py b/test/test_loopy.py index b06dc6ecb..078e89ff4 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1291,12 +1291,12 @@ def test_unschedulable_kernel_detection(): knl = lp.preprocess_kernel(knl) # Check that loopy can detect the unschedulability of the kernel - assert lp.needs_iname_duplication(knl) + assert not lp.has_schedulable_iname_nesting(knl) assert len(list(lp.get_iname_duplication_options(knl))) == 4 for inames, insns in lp.get_iname_duplication_options(knl): fixed_knl = lp.duplicate_inames(knl, inames, insns) - assert not lp.needs_iname_duplication(fixed_knl) + assert lp.has_schedulable_iname_nesting(fixed_knl) knl = lp.make_kernel(["{[i,j,k,l,m]:0<=i,j,k,l,m Date: Thu, 28 Jul 2016 09:15:49 +0200 Subject: [PATCH 10/24] Discard any parallel inames from the duplication options algorithm --- loopy/transform/iname.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 8930e6df4..59176f667 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -896,6 +896,12 @@ def get_iname_duplication_options(knl): # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_deps): + # Check whether this iname has a parallel tag and discard it if so + from loopy.kernel.data import ParallelTag + if (iname in knl.iname_to_tag + and isinstance(knl.iname_to_tag[iname], ParallelTag)): + continue + # Reconstruct an object that may be passed to the within parameter of # loopy.duplicate_inames from loopy.match import Id, Or -- GitLab From a66fce04fee2d1721f8750fd4ddf91c4004142fe Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Mon, 1 Aug 2016 13:28:58 +0200 Subject: [PATCH 11/24] Add a preschedule check for schedulable iname nesting! ... with actionable error message. --- loopy/check.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/loopy/check.py b/loopy/check.py index 12f2366c2..ac862dbbd 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -303,6 +303,24 @@ def check_write_destinations(kernel): or wvar in kernel.arg_dict) and wvar not in kernel.all_params(): raise LoopyError + +def check_has_schedulable_iname_nesting(kernel): + from loopy.transform.iname import (has_schedulable_iname_nesting, + get_iname_duplication_options) + if not has_schedulable_iname_nesting(kernel): + opt = get_iname_duplication_options(kernel) + import itertools as it + raise LoopyError("Kernel does not have a schedulable iname nesting. " + "In order for there to exist a feasible loop nesting, you " + "may need to duplicate an iname. To do so, call " + "loopy.duplicate_iname. Use loopy.get_iname_duplication_options " + "to get hints about which iname to duplicate. Here are some " + "options: " + "\n* Duplicate %s within instructions %s" + "\n* Duplicate %s within instructions %s" + "\n* Duplicate %s within instructions %s" + % tuple(it.chain(*tuple(it.islice(opt, 3))))) + # }}} @@ -319,6 +337,7 @@ def pre_schedule_checks(kernel): check_for_data_dependent_parallel_bounds(kernel) check_bounds(kernel) check_write_destinations(kernel) + check_has_schedulable_iname_nesting(kernel) logger.info("pre-schedule check %s: done" % kernel.name) except KeyboardInterrupt: -- GitLab From ffe24c1bea6e0f6663b49c3decec3be2cd57112f Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Mon, 1 Aug 2016 13:35:14 +0200 Subject: [PATCH 12/24] Add new functions to __all__ in __init__.py --- loopy/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/loopy/__init__.py b/loopy/__init__.py index 3daa00899..778b3d49e 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -170,6 +170,7 @@ __all__ = [ "split_reduction_inward", "split_reduction_outward", "affine_map_inames", "find_unused_axis_tag", "make_reduction_inames_unique", + "has_schedulable_iname_nesting", "get_iname_duplication_options", "add_prefetch", "change_arg_to_image", "tag_array_axes", "tag_data_axes", -- GitLab From 0260cc7bf865949f868c6a74850448829f101564 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Mon, 1 Aug 2016 14:00:23 +0200 Subject: [PATCH 13/24] Make generation of hints statement robust It previously failed if less than 3 options were given by the generator. --- loopy/check.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index ac862dbbd..76b86a7c3 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -308,18 +308,16 @@ def check_has_schedulable_iname_nesting(kernel): from loopy.transform.iname import (has_schedulable_iname_nesting, get_iname_duplication_options) if not has_schedulable_iname_nesting(kernel): - opt = get_iname_duplication_options(kernel) import itertools as it + opt = get_iname_duplication_options(kernel) + opt_str = "\n".join("* Duplicate %s within instructions %s" % (i, w) + for i, w in it.islice(opt, 3)) raise LoopyError("Kernel does not have a schedulable iname nesting. " "In order for there to exist a feasible loop nesting, you " "may need to duplicate an iname. To do so, call " "loopy.duplicate_iname. Use loopy.get_iname_duplication_options " "to get hints about which iname to duplicate. Here are some " - "options: " - "\n* Duplicate %s within instructions %s" - "\n* Duplicate %s within instructions %s" - "\n* Duplicate %s within instructions %s" - % tuple(it.chain(*tuple(it.islice(opt, 3))))) + "options:\n%s" % opt_str) # }}} -- GitLab From ad35d2dd2b1226c27e1a4acbba8a250a12c30ad0 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Mon, 1 Aug 2016 16:55:57 +0200 Subject: [PATCH 14/24] Correctly go into recursion when having common inames --- loopy/transform/iname.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 59176f667..35bffa665 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -807,13 +807,22 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): # Remove common inames of the current insn_deps, as they are not relevant # for splitting. common = frozenset([]).union(*insn_deps).intersection(*insn_deps) - insn_deps = ( + + # If common inames were found, we reduce the problem and go into recursion + if common: + # Remove the common inames from the instruction dependencies + insn_deps = ( frozenset(dep - common for dep in insn_deps) - frozenset([frozenset([])])) + # Join the common inames with those previously found + common = common.union(old_common_inames) - # Join the common inames with those found in recursion - common = common.union(old_common_inames) + # Go into recursion + for option in _get_iname_duplication_options(insn_deps, old_common_inames=common): + yield option + # Do not yield anything beyond here! + return # Try finding a partitioning of the remaining inames, such that all instructions # use only inames from one of the disjoint sets from the partitioning. @@ -838,7 +847,7 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): if len(partitioning) > 1: for part in partitioning: working_set = frozenset(s for s in insn_deps if s.issubset(part)) - for option in _get_iname_duplication_options(working_set, common): + for option in _get_iname_duplication_options(working_set, old_common_inames): yield option # If exactly one set was found, an iname duplication is necessary elif len(partitioning) == 1: @@ -856,7 +865,7 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): for insns_to_dup in it.chain.from_iterable( it.combinations(iname_insns, l) for l in range(1, len(iname_insns))): - yield iname, tuple(insn.union(common) for insn in insns_to_dup) + yield iname, tuple(insn.union(old_common_inames) for insn in insns_to_dup) # If partitioning was empty, we have recursed successfully and yield nothing -- GitLab From c479407135482ebc20e0f4b53823cc3dde042120 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Mon, 1 Aug 2016 16:56:17 +0200 Subject: [PATCH 15/24] Exclude instructions without iname dependencies in dependency extraction --- loopy/transform/iname.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 35bffa665..adaf723ae 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -902,6 +902,8 @@ def get_iname_duplication_options(knl): """ # First we extract the minimal necessary information from the kernel insn_deps = frozenset(insn.forced_iname_deps for insn in knl.instructions) + - + frozenset([frozenset([])]) # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_deps): -- GitLab From 35e64eee300a05859e05fdec844eb420ced924f5 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Mon, 1 Aug 2016 17:05:46 +0200 Subject: [PATCH 16/24] pep8 ups... --- loopy/transform/iname.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index adaf723ae..1930eeedc 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -819,7 +819,7 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): common = common.union(old_common_inames) # Go into recursion - for option in _get_iname_duplication_options(insn_deps, old_common_inames=common): + for option in _get_iname_duplication_options(insn_deps, common): yield option # Do not yield anything beyond here! return @@ -847,7 +847,8 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): if len(partitioning) > 1: for part in partitioning: working_set = frozenset(s for s in insn_deps if s.issubset(part)) - for option in _get_iname_duplication_options(working_set, old_common_inames): + for option in _get_iname_duplication_options(working_set, + old_common_inames): yield option # If exactly one set was found, an iname duplication is necessary elif len(partitioning) == 1: @@ -865,7 +866,9 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): for insns_to_dup in it.chain.from_iterable( it.combinations(iname_insns, l) for l in range(1, len(iname_insns))): - yield iname, tuple(insn.union(old_common_inames) for insn in insns_to_dup) + yield ( + iname, + tuple(insn.union(old_common_inames) for insn in insns_to_dup)) # If partitioning was empty, we have recursed successfully and yield nothing @@ -901,9 +904,10 @@ def get_iname_duplication_options(knl): duplicated in a given kernel. """ # First we extract the minimal necessary information from the kernel - insn_deps = frozenset(insn.forced_iname_deps for insn in knl.instructions) - - - frozenset([frozenset([])]) + insn_deps = ( + frozenset(insn.forced_iname_deps for insn in knl.instructions) + - + frozenset([frozenset([])])) # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_deps): -- GitLab From d26918f434b9b4ad7cc9c873aa20ad536639ec5c Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Mon, 8 Aug 2016 08:52:20 +0200 Subject: [PATCH 17/24] Add 'boostable_into' field to insn deps --- loopy/transform/iname.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 1930eeedc..4f00fa70b 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -905,7 +905,10 @@ def get_iname_duplication_options(knl): """ # First we extract the minimal necessary information from the kernel insn_deps = ( - frozenset(insn.forced_iname_deps for insn in knl.instructions) + frozenset(insn.forced_iname_deps.union( + insn.boostable_into if insn.boostable_into is not None + else frozenset([])) + for insn in knl.instructions) - frozenset([frozenset([])])) -- GitLab From 13a497684322be939247c9092f6f36644f153949 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Fri, 19 Aug 2016 13:15:00 +0200 Subject: [PATCH 18/24] Use boostable_into information only after iname duplication was deemed necessary without. --- loopy/transform/iname.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 4f00fa70b..7ecd1abc7 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -873,7 +873,7 @@ def _get_iname_duplication_options(insn_deps, old_common_inames=frozenset([])): # If partitioning was empty, we have recursed successfully and yield nothing -def get_iname_duplication_options(knl): +def get_iname_duplication_options(knl, use_boostable_into=False): """List options for duplication of inames, if necessary for schedulability :returns: a generator listing all options to duplicate inames, if duplication @@ -904,16 +904,26 @@ def get_iname_duplication_options(knl): duplicated in a given kernel. """ # First we extract the minimal necessary information from the kernel - insn_deps = ( - frozenset(insn.forced_iname_deps.union( - insn.boostable_into if insn.boostable_into is not None - else frozenset([])) - for insn in knl.instructions) - - - frozenset([frozenset([])])) + if use_boostable_into: + insn_deps = ( + frozenset(insn.forced_iname_deps.union( + insn.boostable_into if insn.boostable_into is not None + else frozenset([])) + for insn in knl.instructions) + - + frozenset([frozenset([])])) + else: + insn_deps = frozenset(insn.forced_iname_deps for insn in knl.instructions) # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_deps): + # If we find a duplication option and fo not use boostable_into + # information, we restart this generator with use_boostable_into=True + if not use_boostable_into: + for option in get_iname_duplication_options(knl, True): + yield option + return + # Check whether this iname has a parallel tag and discard it if so from loopy.kernel.data import ParallelTag if (iname in knl.iname_to_tag -- GitLab From 45bd98f8ae8cd8e752783f88a586dd3055056da1 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Fri, 19 Aug 2016 13:33:29 +0200 Subject: [PATCH 19/24] Remove empty set from insn_deps (again) --- loopy/transform/iname.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 7ecd1abc7..8e6f57cc8 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -913,7 +913,10 @@ def get_iname_duplication_options(knl, use_boostable_into=False): - frozenset([frozenset([])])) else: - insn_deps = frozenset(insn.forced_iname_deps for insn in knl.instructions) + insn_deps = ( + frozenset(insn.forced_iname_deps for insn in knl.instructions) + - + frozenset([frozenset([])])) # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_deps): -- GitLab From 35241a0a61e47cfb8572efcd79861587f77e2964 Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Fri, 19 Aug 2016 16:49:19 +0200 Subject: [PATCH 20/24] First discard parallel inames, then try use_boostable_into --- loopy/transform/iname.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 8e6f57cc8..53c4a0575 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -920,6 +920,12 @@ def get_iname_duplication_options(knl, use_boostable_into=False): # Get the duplication options as a tuple of iname and a set for iname, insns in _get_iname_duplication_options(insn_deps): + # Check whether this iname has a parallel tag and discard it if so + from loopy.kernel.data import ParallelTag + if (iname in knl.iname_to_tag + and isinstance(knl.iname_to_tag[iname], ParallelTag)): + continue + # If we find a duplication option and fo not use boostable_into # information, we restart this generator with use_boostable_into=True if not use_boostable_into: @@ -927,12 +933,6 @@ def get_iname_duplication_options(knl, use_boostable_into=False): yield option return - # Check whether this iname has a parallel tag and discard it if so - from loopy.kernel.data import ParallelTag - if (iname in knl.iname_to_tag - and isinstance(knl.iname_to_tag[iname], ParallelTag)): - continue - # Reconstruct an object that may be passed to the within parameter of # loopy.duplicate_inames from loopy.match import Id, Or -- GitLab From 56001eae5c67fd695c3f1b00a276e5c5d58ce97f Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Fri, 19 Aug 2016 17:31:45 +0200 Subject: [PATCH 21/24] Discard duplication options that do not affect an instruction --- loopy/transform/iname.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 53c4a0575..46cb1f0f2 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -939,7 +939,11 @@ def get_iname_duplication_options(knl, use_boostable_into=False): within = Or(tuple( Id(insn.id) for insn in knl.instructions if insn.forced_iname_deps in insns)) - yield iname, within + + # Only yield the result if an instruction matched. With use_boostable_into=True + # this is not always true. + if within.children: + yield iname, within def has_schedulable_iname_nesting(knl): -- GitLab From f1e4852c11919d2c7ad161d6262d330f8a6742bc Mon Sep 17 00:00:00 2001 From: Dominic Kempf Date: Fri, 19 Aug 2016 17:35:40 +0200 Subject: [PATCH 22/24] Emit a warning if falling into boostable_into branch --- loopy/transform/iname.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 46cb1f0f2..3a9941fa8 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -931,6 +931,15 @@ def get_iname_duplication_options(knl, use_boostable_into=False): if not use_boostable_into: for option in get_iname_duplication_options(knl, True): yield option + + # Emit a warning that we needed boostable_into + from warnings import warn + from loopy.diagnostic import LoopyWarning + warn("Kernel '%s' required the deprecated 'boostable_into" + "field in order to be schedulable!" % knl.name, LoopyWarning) + + # Return to avoid yielding the duplication + # options without boostable_into return # Reconstruct an object that may be passed to the within parameter of -- GitLab From fc14cb71fea3a0a07e989db376dceb53f24111f8 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 16 Sep 2016 10:31:14 -0500 Subject: [PATCH 23/24] Restore library_for_test (oops) (!2) --- test/library_for_test.py | 22 ++++++++++++++++++++++ test/test_loopy.py | 27 +++------------------------ 2 files changed, 25 insertions(+), 24 deletions(-) create mode 100644 test/library_for_test.py diff --git a/test/library_for_test.py b/test/library_for_test.py new file mode 100644 index 000000000..b2de0d402 --- /dev/null +++ b/test/library_for_test.py @@ -0,0 +1,22 @@ +# This exists because function handles can't be pickled. + +def no_ret_f_mangler(kernel, name, arg_dtypes): + if not isinstance(name, str): + return None + + if (name == "f" and len(arg_dtypes) == 0): + from loopy.kernel.data import CallMangleInfo + return CallMangleInfo( + target_name="f", + result_dtypes=arg_dtypes, + arg_dtypes=arg_dtypes) + + +def no_ret_f_preamble_gen(preamble_info): + yield ("10_define_f", + r""" + void f() + { + printf("Hi!\n"); + } + """) diff --git a/test/test_loopy.py b/test/test_loopy.py index dfc50fca0..cd6305e57 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -1282,28 +1282,6 @@ def test_finite_difference_expr_subst(ctx_factory): # {{{ call without returned values -def _f_mangler(kernel, name, arg_dtypes): - if not isinstance(name, str): - return None - - if (name == "f" and len(arg_dtypes) == 0): - from loopy.kernel.data import CallMangleInfo - return CallMangleInfo( - target_name="f", - result_dtypes=arg_dtypes, - arg_dtypes=arg_dtypes) - - -def _f_preamble_gen(preamble_info): - yield ("10_define_f", - r""" - void f() - { - printf("Hi!\n"); - } - """) - - def test_call_with_no_returned_value(ctx_factory): import pymbolic.primitives as p @@ -1315,8 +1293,9 @@ def test_call_with_no_returned_value(ctx_factory): [lp.CallInstruction((), p.Call(p.Variable("f"), ()))] ) - knl = lp.register_function_manglers(knl, [_f_mangler]) - knl = lp.register_preamble_generators(knl, [_f_preamble_gen]) + from library_for_test import no_ret_f_mangler, no_ret_f_preamble_gen + knl = lp.register_function_manglers(knl, [no_ret_f_mangler]) + knl = lp.register_preamble_generators(knl, [no_ret_f_preamble_gen]) evt, _ = knl(queue) -- GitLab From 2edb8713eaf4afb54ff167f36006ea306767444c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 21 Sep 2016 19:25:59 -0500 Subject: [PATCH 24/24] Fix SEM Laplacian test with detect-fix-unschedulable --- test/test_sem_reagan.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_sem_reagan.py b/test/test_sem_reagan.py index f4b91b236..0571e4191 100644 --- a/test/test_sem_reagan.py +++ b/test/test_sem_reagan.py @@ -45,10 +45,10 @@ def test_tim2d(ctx_factory): # K - run-time symbolic knl = lp.make_kernel( - "[K] -> {[i,j,e,m,o,gi]: 0<=i,j,m,o<%d and 0<=e