diff --git a/loopy/check.py b/loopy/check.py index 3f7d99076745e545b1ecc4faa913b561dfd3e96c..9903ec70e83edb4bc4bef6bba261c6d119a29be6 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -13,6 +13,9 @@ def check_for_unused_hw_axes_in_insns(kernel): from loopy.kernel import LocalIndexTag, AutoLocalIndexTagBase, GroupIndexTag for insn in kernel.instructions: + if insn.boostable: + continue + group_axes_used = set() local_axes_used = set() @@ -27,11 +30,17 @@ def check_for_unused_hw_axes_in_insns(kernel): raise RuntimeError("auto local tag encountered") if group_axes != group_axes_used: - raise RuntimeError("instruction '%s' does not use all group hw axes" - % insn.id) + raise RuntimeError("instruction '%s' does not use all group hw axes " + "(available: %s used:%s)" + % (insn.id, + ",".join(str(i) for i in group_axes), + ",".join(str(i) for i in group_axes_used))) if local_axes != local_axes_used: raise RuntimeError("instruction '%s' does not use all local hw axes" - % insn.id) + "(available: %s used:%s)" + % (insn.id, + ",".join(str(i) for i in local_axes), + ",".join(str(i) for i in local_axes_used))) diff --git a/loopy/cse.py b/loopy/cse.py index 6a1ab747e8556eeab38e10cc1e6e45ca8b5c4647..3456476439be06132853ffd605b24723ac73d1a7 100644 --- a/loopy/cse.py +++ b/loopy/cse.py @@ -11,7 +11,7 @@ from pymbolic import var -def should_cse_force_iname_dep(iname, duplicate_inames, tag, dependencies, +def check_cse_iname_deps(iname, duplicate_inames, tag, dependencies, target_var_is_local, cse): from loopy.kernel import (LocalIndexTagBase, GroupIndexTag, IlpTag) @@ -49,7 +49,7 @@ def should_cse_force_iname_dep(iname, duplicate_inames, tag, dependencies, raise RuntimeError("invalid: hardware-parallelized " "fetch into private variable") - return False + return # the iname is *not* a dependency of the fetch expression if iname in duplicate_inames: @@ -57,23 +57,6 @@ def should_cse_force_iname_dep(iname, duplicate_inames, tag, dependencies, "that the CSE ('%s') does not depend on " "does not make sense" % (iname, cse.child)) - # Which iname dependencies are carried over from CSE host - # to the CSE compute instruction? - - if not target_var_is_local: - # If we're writing to a private variable, then each - # hardware-parallel iname must execute its own copy of - # the CSE compute instruction. After all, each work item - # has its own set of private variables. - - return kind in "gl" - else: - # If we're writing to a local variable, then all other local - # dimensions see our updates, and thus they do *not* need to - # execute their own copy of this instruction. - - return kind == "g" - @@ -315,10 +298,9 @@ def make_compute_insn(kernel, lead_csed, target_var_name, target_var_is_local, else: tag = kernel.iname_to_tag.get(iname) - if should_cse_force_iname_dep( + check_cse_iname_deps( iname, independent_inames, tag, dependencies, - target_var_is_local, lead_csed.cse): - forced_iname_deps.add(iname) + target_var_is_local, lead_csed.cse) # }}}