From ded9342c7b96bf5bf6e28c7b8f0bbe4eb11ac00b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Sun, 5 Jun 2016 01:43:33 -0500 Subject: [PATCH] Deprecate iname infercence. Do iname inference once up front and never again, fix transformations to work without it, and complain if iname inference actually does anything --- loopy/kernel/__init__.py | 14 +++++++------- loopy/kernel/creation.py | 27 +++++++++++++++++++++++++++ loopy/kernel/data.py | 18 ++---------------- loopy/kernel/tools.py | 26 ++++++++++++++++++-------- loopy/preprocess.py | 12 ++---------- loopy/transform/batch.py | 10 +++++++++- loopy/transform/buffer.py | 9 +++++++-- loopy/transform/diff.py | 4 +++- loopy/transform/iname.py | 25 ++++++++++++++++++++++--- loopy/transform/precompute.py | 17 ++++++++++++----- loopy/version.py | 2 +- 11 files changed, 110 insertions(+), 54 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index f9049ee43..3d64d227e 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -669,9 +669,11 @@ class LoopKernel(RecordWithoutPickling): """Return a mapping from instruction ids to inames inside which they should be run. """ + result = {} + for insn in self.instructions: + result[insn.id] = insn.forced_iname_deps - from loopy.kernel.tools import find_all_insn_inames - return find_all_insn_inames(self) + return result @memoize_method def all_referenced_inames(self): @@ -681,11 +683,9 @@ class LoopKernel(RecordWithoutPickling): return result def insn_inames(self, insn): - from loopy.kernel.data import InstructionBase - if isinstance(insn, InstructionBase): - return self.all_insn_inames()[insn.id] - else: - return self.all_insn_inames()[insn] + if isinstance(insn, str): + insn = self.id_to_insn[insn] + return insn.forced_iname_deps @memoize_method def iname_to_insns(self): diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 1c988e34d..ec3b8b6ef 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1084,6 +1084,19 @@ def resolve_wildcard_deps(knl): # }}} +# {{{ add inferred iname deps + +def add_inferred_inames(knl): + from loopy.kernel.tools import find_all_insn_inames + insn_inames = find_all_insn_inames(knl) + + return knl.copy(instructions=[ + insn.copy(forced_iname_deps=insn_inames[insn.id]) + for insn in knl.instructions]) + +# }}} + + # {{{ kernel creation top-level def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): @@ -1302,6 +1315,20 @@ def make_kernel(domains, instructions, kernel_data=["..."], **kwargs): check_for_nonexistent_iname_deps(knl) knl = create_temporaries(knl, default_order) + # ------------------------------------------------------------------------- + # Ordering dependency: + # ------------------------------------------------------------------------- + # Must create temporaries before inferring inames (because those temporaries + # mediate dependencies that are then used for iname propagation.) + # ------------------------------------------------------------------------- + # NOTE: add_inferred_inames will be phased out and throws warnings if it + # does something. + knl = add_inferred_inames(knl) + # ------------------------------------------------------------------------- + # Ordering dependency: + # ------------------------------------------------------------------------- + # Must infer inames before determining shapes. + # ------------------------------------------------------------------------- knl = determine_shapes_of_temporaries(knl) knl = expand_defines_in_shapes(knl, defines) knl = guess_arg_shape_if_requested(knl, default_order) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 3ba3f1918..44a5618e7 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -561,15 +561,14 @@ class InstructionBase(Record): "groups conflicts_with_groups " "no_sync_with " "predicates " - "forced_iname_deps_is_final stop_iname_dep_propagation " - "stop_iname_dep_propagation " + "forced_iname_deps_is_final forced_iname_deps " "priority boostable boostable_into".split()) def __init__(self, id, depends_on, depends_on_is_final, groups, conflicts_with_groups, no_sync_with, forced_iname_deps_is_final, forced_iname_deps, - stop_iname_dep_propagation, priority, + priority, boostable, boostable_into, predicates, tags, insn_deps=None, insn_deps_is_final=None): @@ -598,9 +597,6 @@ class InstructionBase(Record): if forced_iname_deps_is_final is None: forced_iname_deps_is_final = False - if stop_iname_dep_propagation is None: - stop_iname_dep_propagation = frozenset() - if depends_on_is_final is None: depends_on_is_final = False @@ -623,7 +619,6 @@ class InstructionBase(Record): # assert all(is_interned(pred) for pred in predicates) assert isinstance(forced_iname_deps, frozenset) - assert isinstance(stop_iname_dep_propagation, frozenset) assert isinstance(depends_on, frozenset) or depends_on is None assert isinstance(groups, frozenset) assert isinstance(conflicts_with_groups, frozenset) @@ -636,7 +631,6 @@ class InstructionBase(Record): groups=groups, conflicts_with_groups=conflicts_with_groups, forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, - stop_iname_dep_propagation=stop_iname_dep_propagation, priority=priority, boostable=boostable, boostable_into=boostable_into, @@ -808,8 +802,6 @@ class InstructionBase(Record): intern_frozenset_of_ids(self.forced_iname_deps)) self.predicates = ( intern_frozenset_of_ids(self.predicates)) - self.stop_iname_dep_propagation = ( - intern_frozenset_of_ids(self.stop_iname_dep_propagation)) # }}} @@ -1101,7 +1093,6 @@ class Assignment(MultiAssignmentBase): no_sync_with=None, forced_iname_deps_is_final=None, forced_iname_deps=frozenset(), - stop_iname_dep_propagation=None, boostable=None, boostable_into=None, tags=None, temp_var_type=None, atomicity=(), priority=0, predicates=frozenset(), @@ -1116,7 +1107,6 @@ class Assignment(MultiAssignmentBase): no_sync_with=no_sync_with, forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, - stop_iname_dep_propagation=stop_iname_dep_propagation, boostable=boostable, boostable_into=boostable_into, priority=priority, @@ -1245,7 +1235,6 @@ class CallInstruction(MultiAssignmentBase): no_sync_with=None, forced_iname_deps_is_final=None, forced_iname_deps=frozenset(), - stop_iname_dep_propagation=None, boostable=None, boostable_into=None, tags=None, temp_var_types=None, priority=0, predicates=frozenset(), @@ -1260,7 +1249,6 @@ class CallInstruction(MultiAssignmentBase): no_sync_with=no_sync_with, forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, - stop_iname_dep_propagation=stop_iname_dep_propagation, boostable=boostable, boostable_into=boostable_into, priority=priority, @@ -1421,7 +1409,6 @@ class CInstruction(InstructionBase): groups=None, conflicts_with_groups=None, no_sync_with=None, forced_iname_deps_is_final=None, forced_iname_deps=frozenset(), - stop_iname_dep_propagation=None, priority=0, boostable=None, boostable_into=None, predicates=frozenset(), tags=None, insn_deps=None, insn_deps_is_final=None): @@ -1443,7 +1430,6 @@ class CInstruction(InstructionBase): no_sync_with=no_sync_with, forced_iname_deps_is_final=forced_iname_deps_is_final, forced_iname_deps=forced_iname_deps, - stop_iname_dep_propagation=stop_iname_dep_propagation, boostable=boostable, boostable_into=boostable_into, priority=priority, predicates=predicates, tags=tags, diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 5a100d712..1775032cb 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -30,7 +30,7 @@ from six.moves import intern import numpy as np import islpy as isl from islpy import dim_type -from loopy.diagnostic import LoopyError +from loopy.diagnostic import LoopyError, warn_with_kernel import logging logger = logging.getLogger(__name__) @@ -164,9 +164,7 @@ def find_all_insn_inames(kernel): )) insn_id_to_inames[insn.id] = iname_deps - insn_assignee_inames[insn.id] = ( - write_deps & kernel.all_inames() - | insn.stop_iname_dep_propagation) + insn_assignee_inames[insn.id] = write_deps & kernel.all_inames() written_vars = kernel.get_written_variables() @@ -216,8 +214,14 @@ def find_all_insn_inames(kernel): if inames_new != inames_old: did_something = True - logger.debug("%s: find_all_insn_inames: %s -> %s (dep-based)" % ( - kernel.name, insn.id, ", ".join(sorted(inames_new)))) + + warn_with_kernel(kernel, "inferred_iname", + "The iname(s) '%s' on instruction '%s' was " + "automatically added. " + "This is deprecated. Please add the iname " + "to the instruction " + "implicitly, e.g. by adding '{inames=...}" + % (inames_new-inames_old, insn.id)) # }}} @@ -247,8 +251,14 @@ def find_all_insn_inames(kernel): if inames_new != inames_old: did_something = True insn_id_to_inames[insn.id] = frozenset(inames_new) - logger.debug("%s: find_all_insn_inames: %s -> %s (domain-based)" % ( - kernel.name, insn.id, ", ".join(sorted(inames_new)))) + + warn_with_kernel(kernel, "inferred_iname", + "The iname(s) '%s' on instruction '%s' was " + "automatically added. " + "This is deprecated. Please add the iname " + "to the instruction " + "implicitly, e.g. by adding '{inames=...}" + % (inames_new-inames_old, insn.id)) # }}} diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 5690218e6..d333cdf18 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -544,8 +544,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): expr.expr, expr.inames), depends_on=frozenset([init_insn.id]) | insn.depends_on, forced_iname_deps=update_insn_iname_deps, - forced_iname_deps_is_final=insn.forced_iname_deps_is_final, - stop_iname_dep_propagation=frozenset(expr.inames)) + forced_iname_deps_is_final=insn.forced_iname_deps_is_final) generated_insns.append(reduction_insn) @@ -716,8 +715,6 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): new_insn_add_depends_on.add(prev_id) new_insn_add_no_sync_with.add(prev_id) new_insn_add_forced_iname_deps.add(stage_exec_iname or base_exec_iname) - new_insn_add_stop_iname_dep_propagation.add( - stage_exec_iname or base_exec_iname) if nresults == 1: assert len(acc_vars) == 1 @@ -828,7 +825,6 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): new_insn_add_depends_on = set() new_insn_add_no_sync_with = set() new_insn_add_forced_iname_deps = set() - new_insn_add_stop_iname_dep_propagation = set() generated_insns = [] @@ -859,11 +855,7 @@ def realize_reduction(kernel, insn_id_filter=None, unknown_types_ok=True): | frozenset(new_insn_add_no_sync_with), forced_iname_deps=( temp_kernel.insn_inames(insn) - | new_insn_add_forced_iname_deps), - stop_iname_dep_propagation=( - insn.stop_iname_dep_propagation - | new_insn_add_stop_iname_dep_propagation), - ) + | new_insn_add_forced_iname_deps)) kwargs.pop("id") kwargs.pop("expression") diff --git a/loopy/transform/batch.py b/loopy/transform/batch.py index 967e14de6..97c15c6f2 100644 --- a/loopy/transform/batch.py +++ b/loopy/transform/batch.py @@ -143,9 +143,17 @@ def to_batched(knl, nbatches, batch_varying_args, batch_iname_prefix="ibatch", bvc = _BatchVariableChanger(rule_mapping_context, knl, batch_varying_args, batch_iname_expr, sequential=sequential) - return rule_mapping_context.finish_kernel( + kernel = rule_mapping_context.finish_kernel( bvc.map_kernel(knl)) + batch_iname_set = frozenset([batch_iname]) + kernel = kernel.copy( + instructions=[ + insn.copy(forced_iname_deps=insn.forced_iname_deps | batch_iname_set) + for insn in kernel.instructions]) + + return kernel + # }}} # vim: foldmethod=marker diff --git a/loopy/transform/buffer.py b/loopy/transform/buffer.py index fb32b3ce8..0c25b6393 100644 --- a/loopy/transform/buffer.py +++ b/loopy/transform/buffer.py @@ -400,7 +400,9 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, init_instruction = Assignment(id=init_insn_id, assignee=buf_var_init, expression=init_expression, - forced_iname_deps=frozenset(within_inames), + forced_iname_deps=( + frozenset(within_inames) + | frozenset(non1_init_inames)), depends_on=frozenset(), depends_on_is_final=True) @@ -475,9 +477,12 @@ def buffer_array(kernel, var_name, buffer_inames, init_expression=None, store_instruction = Assignment( id=kernel.make_unique_instruction_id(based_on="store_"+var_name), depends_on=frozenset(aar.modified_insn_ids), + no_sync_with=frozenset([init_insn_id]), assignee=store_target, expression=store_expression, - forced_iname_deps=frozenset(within_inames)) + forced_iname_deps=( + frozenset(within_inames) + | frozenset(non1_store_inames))) else: did_write = False diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py index 1f75a60b8..a8c3b5849 100644 --- a/loopy/transform/diff.py +++ b/loopy/transform/diff.py @@ -310,7 +310,9 @@ class DifferentiationContext(object): id=new_insn_id, assignee=var(new_var_name)[ lhs_ind + diff_iname_exprs], - expression=diff_expr) + expression=diff_expr, + forced_iname_deps=( + orig_writer_insn.forced_iname_deps | frozenset(diff_inames))) self.new_instructions.append(insn) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 12749a303..4c3cd0a69 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -1142,8 +1142,8 @@ def affine_map_inames(kernel, old_inames, new_inames, equations): # {{{ change domains - new_inames_set = set(new_inames) - old_inames_set = set(old_inames) + new_inames_set = frozenset(new_inames) + old_inames_set = frozenset(old_inames) new_domains = [] for idom, dom in enumerate(kernel.domains): @@ -1229,7 +1229,26 @@ def affine_map_inames(kernel, old_inames, new_inames, equations): # }}} - return kernel.copy(domains=new_domains) + # {{{ switch iname refs in instructions + + def fix_iname_set(insn_id, inames): + if old_inames_set <= inames: + return (inames - old_inames_set) | new_inames_set + elif old_inames_set & inames: + raise LoopyError("instruction '%s' uses only a part (%s), not all, " + "of the old inames" + % (insn_id, ", ".join(old_inames_set & inames))) + else: + return inames + + new_instructions = [ + insn.copy(forced_iname_deps=fix_iname_set( + insn.id, insn.forced_iname_deps)) + for insn in kernel.instructions] + + # }}} + + return kernel.copy(domains=new_domains, instructions=new_instructions) # }}} diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index fd6f33efc..f9d71b9f1 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -480,8 +480,9 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, from loopy.symbolic import SubstitutionRuleExpander submap = SubstitutionRuleExpander(kernel.substitutions) - value_inames = get_dependencies( - submap(subst.expression) + value_inames = ( + get_dependencies(submap(subst.expression)) + - frozenset(subst.arguments) ) & kernel.all_inames() if value_inames - expanding_usage_arg_deps < extra_storage_axes: raise RuntimeError("unreferenced sweep inames specified: " @@ -728,8 +729,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, assignee = var(temporary_name) if non1_storage_axis_names: - assignee = assignee.index( - tuple(var(iname) for iname in non1_storage_axis_names)) + assignee = assignee[ + tuple(var(iname) for iname in non1_storage_axis_names)] # {{{ process substitutions on compute instruction @@ -764,7 +765,13 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, compute_insn = Assignment( id=compute_insn_id, assignee=assignee, - expression=compute_expression) + expression=compute_expression, + forced_iname_deps=( + frozenset(non1_storage_axis_names) + | frozenset( + (expanding_usage_arg_deps | value_inames) + - sweep_inames_set)) + ) # }}} diff --git a/loopy/version.py b/loopy/version.py index bb1b123cb..0fcb9c04d 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -32,4 +32,4 @@ except ImportError: else: _islpy_version = islpy.version.VERSION_TEXT -DATA_MODEL_VERSION = "v35-islpy%s" % _islpy_version +DATA_MODEL_VERSION = "v36-islpy%s" % _islpy_version -- GitLab