From bbe3669296c2ecd9121b37efd90618eb58d92293 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 5 Jun 2016 12:56:06 -0500 Subject: [PATCH] Take accessed vars into account when determining default inames of precompute instructions --- loopy/kernel/tools.py | 86 +++++++++++++++++++++-------------- loopy/transform/precompute.py | 33 ++++++++++++-- 2 files changed, 79 insertions(+), 40 deletions(-) diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 1775032cb..a4e6ab0d6 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -125,7 +125,41 @@ def _add_and_infer_dtypes_overdetermined(knl, dtype_dict): # }}} -# {{{ find_all_insn_inames fixed point iteration +# {{{ find_all_insn_inames fixed point iteration (deprecated) + +def guess_iname_deps_based_on_var_use(kernel, insn, insn_id_to_inames=None): + # For all variables that insn depends on, find the intersection + # of iname deps of all writers, and add those to insn's + # dependencies. + + result = frozenset() + + writer_map = kernel.writer_map() + + for tv_name in (insn.read_dependency_names() & kernel.get_written_variables()): + tv_implicit_inames = None + + for writer_id in writer_map[tv_name]: + writer_insn = kernel.id_to_insn[writer_id] + if insn_id_to_inames is None: + writer_inames = writer_insn.forced_iname_deps + else: + writer_inames = insn_id_to_inames[writer_id] + + writer_implicit_inames = ( + writer_inames + - (writer_insn.write_dependency_names() & kernel.all_inames())) + if tv_implicit_inames is None: + tv_implicit_inames = writer_implicit_inames + else: + tv_implicit_inames = (tv_implicit_inames + & writer_implicit_inames) + + if tv_implicit_inames is not None: + result = result | tv_implicit_inames + + return result - insn.reduction_inames() + def find_all_insn_inames(kernel): logger.debug("%s: find_all_insn_inames: start" % kernel.name) @@ -166,8 +200,6 @@ def find_all_insn_inames(kernel): insn_id_to_inames[insn.id] = iname_deps insn_assignee_inames[insn.id] = write_deps & kernel.all_inames() - written_vars = kernel.get_written_variables() - # fixed point iteration until all iname dep sets have converged # Why is fixed point iteration necessary here? Consider the following @@ -190,38 +222,22 @@ def find_all_insn_inames(kernel): # {{{ depdency-based propagation - # For all variables that insn depends on, find the intersection - # of iname deps of all writers, and add those to insn's - # dependencies. - - for tv_name in (all_read_deps[insn.id] & written_vars): - implicit_inames = None - - for writer_id in writer_map[tv_name]: - writer_implicit_inames = ( - insn_id_to_inames[writer_id] - - insn_assignee_inames[writer_id]) - if implicit_inames is None: - implicit_inames = writer_implicit_inames - else: - implicit_inames = (implicit_inames - & writer_implicit_inames) - - inames_old = insn_id_to_inames[insn.id] - inames_new = (inames_old | implicit_inames) \ - - insn.reduction_inames() - insn_id_to_inames[insn.id] = inames_new - - if inames_new != inames_old: - did_something = True - - warn_with_kernel(kernel, "inferred_iname", - "The iname(s) '%s' on instruction '%s' was " - "automatically added. " - "This is deprecated. Please add the iname " - "to the instruction " - "implicitly, e.g. by adding '{inames=...}" - % (inames_new-inames_old, insn.id)) + inames_old = insn_id_to_inames[insn.id] + inames_new = inames_old | guess_iname_deps_based_on_var_use( + kernel, insn, insn_id_to_inames) + + insn_id_to_inames[insn.id] = inames_new + + if inames_new != inames_old: + did_something = True + + warn_with_kernel(kernel, "inferred_iname", + "The iname(s) '%s' on instruction '%s' was " + "automatically added. " + "This is deprecated. Please add the iname " + "to the instruction " + "implicitly, e.g. by adding '{inames=...}" + % (inames_new-inames_old, insn.id)) # }}} diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index f9d71b9f1..ce54bb54c 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -239,6 +239,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): def precompute(kernel, subst_use, sweep_inames=[], within=None, storage_axes=None, temporary_name=None, precompute_inames=None, + precompute_outer_inames=None, storage_axis_to_tag={}, default_tag="l.auto", dtype=None, fetch_bounding_box=False, temporary_scope=None, temporary_is_local=None, @@ -307,6 +308,9 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, tuple, in which case names will be automatically created. May also equivalently be a comma-separated string. + :arg precompute_outer_inames: The inames within which the compute + instruction is nested. If *None*, guess from dependencies. + :arg compute_insn_id: The ID of the instruction performing the precomputation. If `storage_axes` is not specified, it defaults to the arrangement @@ -766,11 +770,7 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, id=compute_insn_id, assignee=assignee, expression=compute_expression, - forced_iname_deps=( - frozenset(non1_storage_axis_names) - | frozenset( - (expanding_usage_arg_deps | value_inames) - - sweep_inames_set)) + # forced_iname_deps determined below ) # }}} @@ -791,6 +791,29 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # }}} + # {{{ determine inames for compute insn + + if precompute_outer_inames is None: + from loopy.kernel.tools import guess_iname_deps_based_on_var_use + precompute_outer_inames = ( + frozenset(non1_storage_axis_names) + | frozenset( + (expanding_usage_arg_deps | value_inames) + - sweep_inames_set) + | guess_iname_deps_based_on_var_use(kernel, compute_insn)) + else: + if not isinstance(precompute_outer_inames, frozenset): + raise TypeError("precompute_outer_inames must be a frozenset") + + kernel = kernel.copy( + instructions=[ + insn.copy(forced_iname_deps=precompute_outer_inames) + if insn.id == compute_insn_id + else insn + for insn in kernel.instructions]) + + # }}} + # {{{ set up temp variable import loopy as lp -- GitLab