From 5a2bf4ad1347c776c779bbee733e564ec06c0c02 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 4 Nov 2016 00:19:49 -0500 Subject: [PATCH 1/3] Fix printing of dependencies of individual instructions --- loopy/kernel/instruction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 921d2538a..c54d1fc32 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -364,7 +364,7 @@ class InstructionBase(Record): raise RuntimeError("unexpected value for Instruction.boostable") if self.depends_on: - result.append("deps="+":".join(self.depends_on)) + result.append("dep="+":".join(self.depends_on)) if self.no_sync_with: result.append("nosync="+":".join(self.no_sync_with)) if self.groups: -- GitLab From ccb588989e2e6a1e55cb42b8dc79742a78d211e6 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 4 Nov 2016 00:20:25 -0500 Subject: [PATCH 2/3] Find dependencies of precompute instructions --- loopy/transform/precompute.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 27fad67f8..2b01de3b6 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -136,7 +136,7 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): access_descriptors, array_base_map, storage_axis_names, storage_axis_sources, non1_storage_axis_names, - temporary_name, compute_insn_id): + temporary_name, compute_insn_id, compute_read_variables): super(RuleInvocationReplacer, self).__init__(rule_mapping_context) self.subst_name = subst_name @@ -153,6 +153,9 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): self.temporary_name = temporary_name self.compute_insn_id = compute_insn_id + self.compute_read_variables = compute_read_variables + self.compute_insn_deps = set() + def map_substitution(self, name, tag, arguments, expn_state): if not ( name == self.subst_name @@ -230,6 +233,9 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): insn.depends_on | frozenset([self.compute_insn_id]))) + self.compute_insn_deps.update( + insn.depends_on - set([self.compute_insn_id])) + new_insns.append(insn) return kernel.copy(instructions=new_insns) @@ -787,7 +793,8 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, access_descriptors, abm, storage_axis_names, storage_axis_sources, non1_storage_axis_names, - temporary_name, compute_insn_id) + temporary_name, compute_insn_id, + compute_read_variables=get_dependencies(compute_expression)) kernel = invr.map_kernel(kernel) kernel = kernel.copy( @@ -796,6 +803,17 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # }}} + # {{{ add dependencies to compute insn + + kernel = kernel.copy( + instructions=[ + insn.copy(depends_on=frozenset(invr.compute_insn_deps)) + if insn.id == compute_insn_id + else insn + for insn in kernel.instructions]) + + # }}} + # {{{ determine inames for compute insn if precompute_outer_inames is None: -- GitLab From 510304d0b5dcb0477b738698df6c59862ad5a681 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 4 Nov 2016 00:59:08 -0500 Subject: [PATCH 3/3] Determine dependencies of precompute instructions using referred-to variables --- loopy/transform/precompute.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 2b01de3b6..db993b771 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -233,8 +233,15 @@ class RuleInvocationReplacer(RuleAwareIdentityMapper): insn.depends_on | frozenset([self.compute_insn_id]))) - self.compute_insn_deps.update( - insn.depends_on - set([self.compute_insn_id])) + for dep in insn.depends_on: + if dep == self.compute_insn_id: + continue + + dep_insn = kernel.id_to_insn[dep] + if (frozenset(dep_insn.assignee_var_names()) + & self.compute_read_variables): + self.compute_insn_deps.update( + insn.depends_on - set([self.compute_insn_id])) new_insns.append(insn) @@ -788,13 +795,16 @@ def precompute(kernel, subst_use, sweep_inames=[], within=None, # {{{ substitute rule into expressions in kernel (if within footprint) + from loopy.symbolic import SubstitutionRuleExpander + expander = SubstitutionRuleExpander(kernel.substitutions) + invr = RuleInvocationReplacer(rule_mapping_context, subst_name, subst_tag, within, access_descriptors, abm, storage_axis_names, storage_axis_sources, non1_storage_axis_names, temporary_name, compute_insn_id, - compute_read_variables=get_dependencies(compute_expression)) + compute_read_variables=get_dependencies(expander(compute_expression))) kernel = invr.map_kernel(kernel) kernel = kernel.copy( -- GitLab