diff --git a/loopy/check.py b/loopy/check.py
index 3f7d99076745e545b1ecc4faa913b561dfd3e96c..9903ec70e83edb4bc4bef6bba261c6d119a29be6 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -13,6 +13,9 @@ def check_for_unused_hw_axes_in_insns(kernel):
 
     from loopy.kernel import LocalIndexTag, AutoLocalIndexTagBase, GroupIndexTag
     for insn in kernel.instructions:
+        if insn.boostable:
+            continue
+
         group_axes_used = set()
         local_axes_used = set()
 
@@ -27,11 +30,17 @@ def check_for_unused_hw_axes_in_insns(kernel):
                 raise RuntimeError("auto local tag encountered")
 
         if group_axes != group_axes_used:
-            raise RuntimeError("instruction '%s' does not use all group hw axes"
-                    % insn.id)
+            raise RuntimeError("instruction '%s' does not use all group hw axes "
+                    "(available: %s used:%s)"
+                    % (insn.id,
+                        ",".join(str(i) for i in group_axes),
+                        ",".join(str(i) for i in group_axes_used)))
         if local_axes != local_axes_used:
             raise RuntimeError("instruction '%s' does not use all local hw axes"
-                    % insn.id)
+                    "(available: %s used:%s)"
+                    % (insn.id,
+                        ",".join(str(i) for i in local_axes),
+                        ",".join(str(i) for i in local_axes_used)))
 
 
 
diff --git a/loopy/cse.py b/loopy/cse.py
index 6a1ab747e8556eeab38e10cc1e6e45ca8b5c4647..3456476439be06132853ffd605b24723ac73d1a7 100644
--- a/loopy/cse.py
+++ b/loopy/cse.py
@@ -11,7 +11,7 @@ from pymbolic import var
 
 
 
-def should_cse_force_iname_dep(iname, duplicate_inames, tag, dependencies,
+def check_cse_iname_deps(iname, duplicate_inames, tag, dependencies,
         target_var_is_local, cse):
     from loopy.kernel import (LocalIndexTagBase, GroupIndexTag, IlpTag)
 
@@ -49,7 +49,7 @@ def should_cse_force_iname_dep(iname, duplicate_inames, tag, dependencies,
             raise RuntimeError("invalid: hardware-parallelized "
                     "fetch into private variable")
 
-        return False
+        return
 
     # the iname is *not* a dependency of the fetch expression
     if iname in duplicate_inames:
@@ -57,23 +57,6 @@ def should_cse_force_iname_dep(iname, duplicate_inames, tag, dependencies,
                 "that the CSE ('%s') does not depend on "
                 "does not make sense" % (iname, cse.child))
 
-    # Which iname dependencies are carried over from CSE host
-    # to the CSE compute instruction?
-
-    if not target_var_is_local:
-        # If we're writing to a private variable, then each
-        # hardware-parallel iname must execute its own copy of
-        # the CSE compute instruction. After all, each work item
-        # has its own set of private variables.
-
-        return kind in "gl"
-    else:
-        # If we're writing to a local variable, then all other local
-        # dimensions see our updates, and thus they do *not* need to
-        # execute their own copy of this instruction.
-
-        return kind == "g"
-
 
 
 
@@ -315,10 +298,9 @@ def make_compute_insn(kernel, lead_csed, target_var_name, target_var_is_local,
         else:
             tag = kernel.iname_to_tag.get(iname)
 
-        if should_cse_force_iname_dep(
+        check_cse_iname_deps(
                 iname, independent_inames, tag, dependencies,
-                target_var_is_local, lead_csed.cse):
-            forced_iname_deps.add(iname)
+                target_var_is_local, lead_csed.cse)
 
     # }}}