From 3806860a8e212731daba067b210b04983f29a64e Mon Sep 17 00:00:00 2001 From: Tim Warburton <timwar@caam.rice.edu> Date: Tue, 1 Nov 2011 23:30:06 -0500 Subject: [PATCH] Make it ok for boostable instructions to not depend on all hw axes. If we didn't do this, then the CSE generator would have to anticipate that the CSE compute instruction would need to depend on more inames (those unused hw axes) than actually declared--because previously innocuous-seeming inames could suddenly turn out to be hardware axes. --- loopy/check.py | 15 ++++++++++++--- loopy/cse.py | 26 ++++---------------------- 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/loopy/check.py b/loopy/check.py index 3f7d99076..9903ec70e 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -13,6 +13,9 @@ def check_for_unused_hw_axes_in_insns(kernel): from loopy.kernel import LocalIndexTag, AutoLocalIndexTagBase, GroupIndexTag for insn in kernel.instructions: + if insn.boostable: + continue + group_axes_used = set() local_axes_used = set() @@ -27,11 +30,17 @@ def check_for_unused_hw_axes_in_insns(kernel): raise RuntimeError("auto local tag encountered") if group_axes != group_axes_used: - raise RuntimeError("instruction '%s' does not use all group hw axes" - % insn.id) + raise RuntimeError("instruction '%s' does not use all group hw axes " + "(available: %s used:%s)" + % (insn.id, + ",".join(str(i) for i in group_axes), + ",".join(str(i) for i in group_axes_used))) if local_axes != local_axes_used: raise RuntimeError("instruction '%s' does not use all local hw axes" - % insn.id) + "(available: %s used:%s)" + % (insn.id, + ",".join(str(i) for i in local_axes), + ",".join(str(i) for i in local_axes_used))) diff --git a/loopy/cse.py b/loopy/cse.py index 6a1ab747e..345647643 100644 --- a/loopy/cse.py +++ b/loopy/cse.py @@ -11,7 +11,7 @@ from pymbolic import var -def should_cse_force_iname_dep(iname, duplicate_inames, tag, dependencies, +def check_cse_iname_deps(iname, duplicate_inames, tag, dependencies, target_var_is_local, cse): from loopy.kernel import (LocalIndexTagBase, GroupIndexTag, IlpTag) @@ -49,7 +49,7 @@ def should_cse_force_iname_dep(iname, duplicate_inames, tag, dependencies, raise RuntimeError("invalid: hardware-parallelized " "fetch into private variable") - return False + return # the iname is *not* a dependency of the fetch expression if iname in duplicate_inames: @@ -57,23 +57,6 @@ def should_cse_force_iname_dep(iname, duplicate_inames, tag, dependencies, "that the CSE ('%s') does not depend on " "does not make sense" % (iname, cse.child)) - # Which iname dependencies are carried over from CSE host - # to the CSE compute instruction? - - if not target_var_is_local: - # If we're writing to a private variable, then each - # hardware-parallel iname must execute its own copy of - # the CSE compute instruction. After all, each work item - # has its own set of private variables. - - return kind in "gl" - else: - # If we're writing to a local variable, then all other local - # dimensions see our updates, and thus they do *not* need to - # execute their own copy of this instruction. - - return kind == "g" - @@ -315,10 +298,9 @@ def make_compute_insn(kernel, lead_csed, target_var_name, target_var_is_local, else: tag = kernel.iname_to_tag.get(iname) - if should_cse_force_iname_dep( + check_cse_iname_deps( iname, independent_inames, tag, dependencies, - target_var_is_local, lead_csed.cse): - forced_iname_deps.add(iname) + target_var_is_local, lead_csed.cse) # }}} -- GitLab