diff --git a/loopy/check.py b/loopy/check.py
index e9ff2425e2c4627dcf75591bdea4e112f1497643..3f7d99076745e545b1ecc4faa913b561dfd3e96c 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -8,8 +8,8 @@ from __future__ import division
 def check_for_unused_hw_axes_in_insns(kernel):
     group_size, local_size = kernel.get_grid_sizes_as_exprs()
 
-    group_axes = set(range(len(group_size)))
-    local_axes = set(range(len(local_size)))
+    group_axes = set(ax for ax, length in enumerate(group_size) if length != 1)
+    local_axes = set(ax for ax, length in enumerate(local_size) if length != 1)
 
     from loopy.kernel import LocalIndexTag, AutoLocalIndexTagBase, GroupIndexTag
     for insn in kernel.instructions:
diff --git a/loopy/codegen/bounds.py b/loopy/codegen/bounds.py
index bbd6ab8bcc7830763f6485db1af7316b5d61820f..88a64e7b270f4c1077f645710d5ba9a8111f4357 100644
--- a/loopy/codegen/bounds.py
+++ b/loopy/codegen/bounds.py
@@ -149,7 +149,6 @@ def wrap_in_for_from_constraints(ccm, iname, constraint_bset, stmt):
 
     constraints = constraint_bset.get_constraints()
 
-    from pymbolic import expand
     from pymbolic.mapper.constant_folder import CommutativeConstantFoldingMapper
 
     cfm = CommutativeConstantFoldingMapper()
@@ -174,7 +173,7 @@ def wrap_in_for_from_constraints(ccm, iname, constraint_bset, stmt):
             from pymbolic import var
             rhs += iname_coeff*var(iname)
             end_conds.append("%s >= 0" %
-                    ccm(cfm(expand(rhs))))
+                    ccm(cfm(rhs)))
         else: #  iname_coeff > 0
             kind, bound = solve_constraint_for_bound(cns, iname)
             assert kind == ">="
diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index ade8d9fe5a79ed1bce3a70a398375b52b56695e0..3d81a5d6a29f181f2f1f0f43a4787c15bce14ed1 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -29,9 +29,6 @@ def get_simple_loop_bounds(kernel, sched_index, iname, implemented_domain):
 # {{{ conditional-minimizing slab decomposition
 
 def get_slab_decomposition(kernel, iname, sched_index, codegen_state):
-    space = kernel.space
-    tag = kernel.iname_to_tag.get(iname)
-
     lb_cns_orig, ub_cns_orig = get_simple_loop_bounds(kernel, sched_index, iname,
             codegen_state.implemented_domain)
 
@@ -39,7 +36,6 @@ def get_slab_decomposition(kernel, iname, sched_index, codegen_state):
 
     iname_tp, iname_idx = kernel.iname_to_dim[iname]
 
-    constraints = [lb_cns_orig]
     if lower_incr or upper_incr:
         bounds = kernel.get_iname_bounds(iname)
 
@@ -118,8 +114,6 @@ def get_slab_decomposition(kernel, iname, sched_index, codegen_state):
 # {{{ unrolled loops
 
 def generate_unroll_loop(kernel, sched_index, codegen_state):
-    ccm = codegen_state.c_code_mapper
-    space = kernel.space
     iname = kernel.schedule[sched_index].iname
     tag = kernel.iname_to_tag.get(iname)
 
@@ -167,7 +161,9 @@ def set_up_hw_parallel_loops(kernel, sched_index, codegen_state, hw_inames_left=
 
     global_size, local_size = kernel.get_grid_sizes()
 
+    hw_inames_left = hw_inames_left[:]
     iname = hw_inames_left.pop()
+
     tag = kernel.iname_to_tag.get(iname)
 
     assert isinstance(tag, UniqueTag)
@@ -205,8 +201,6 @@ def set_up_hw_parallel_loops(kernel, sched_index, codegen_state, hw_inames_left=
         raise RuntimeError("cannot do slab decomposition on inames that share "
                 "a tag with other inames")
 
-    ccm = codegen_state.c_code_mapper
-
     result = []
 
     from loopy.codegen import add_comment
@@ -230,9 +224,7 @@ def set_up_hw_parallel_loops(kernel, sched_index, codegen_state, hw_inames_left=
 
 def generate_sequential_loop_dim_code(kernel, sched_index, codegen_state):
     ccm = codegen_state.c_code_mapper
-    space = kernel.space
     iname = kernel.schedule[sched_index].iname
-    tag = kernel.iname_to_tag.get(iname)
 
     slabs = get_slab_decomposition(
             kernel, iname, sched_index, codegen_state)
diff --git a/loopy/compiled.py b/loopy/compiled.py
index 0c6ec7580ee3a29de72aab6c307e7404f3970311..9b976aa186600ac6f511e72d7fcc58342566ea69 100644
--- a/loopy/compiled.py
+++ b/loopy/compiled.py
@@ -1,4 +1,5 @@
 from __future__ import division
+import pyopencl as cl
 
 
 
diff --git a/loopy/kernel.py b/loopy/kernel.py
index ad27038e65aa050e3266644806b89658535f0225..04bce3bad9da15dd14c20d5e63c3276c86750fbc 100644
--- a/loopy/kernel.py
+++ b/loopy/kernel.py
@@ -772,6 +772,8 @@ class LoopKernel(Record):
             size_list = []
             sorted_axes = sorted(size_dict.iterkeys())
 
+            zero_aff = isl.Aff.zero_on_domain(self.space.params())
+
             while sorted_axes or forced_sizes:
                 if sorted_axes:
                     cur_axis = sorted_axes.pop(0)
@@ -781,8 +783,7 @@ class LoopKernel(Record):
                 if len(size_list) in forced_sizes:
                     size_list.append(
                             isl.PwAff.from_aff(
-                                isl.Aff.zero_on_domain(self.space.params())
-                                + forced_sizes.pop(len(size_list))))
+                                zero_aff + forced_sizes.pop(len(size_list))))
                     continue
 
                 assert cur_axis is not None
@@ -792,7 +793,7 @@ class LoopKernel(Record):
                     from warnings import warn
                     warn("%s axis %d unassigned--assuming length 1" % (
                         which, len(size_list)), LoopyAdvisory)
-                    size_list.append(1)
+                    size_list.append(zero_aff + 1)
 
                 size_list.append(size_dict[cur_axis])
 
diff --git a/loopy/symbolic.py b/loopy/symbolic.py
index b25dd296528c9ca8d816e69376cf27e36a8ae8c5..8f28afb33c1e45f0cbec609ed907567d2ee745fd 100644
--- a/loopy/symbolic.py
+++ b/loopy/symbolic.py
@@ -441,8 +441,6 @@ def pw_aff_to_expr(pw_aff):
     return aff_to_expr(aff)
 
 def aff_from_expr(space, expr):
-    n = space.dim(dim_type.set)
-
     zero = isl.Aff.zero_on_domain(isl.LocalSpace.from_space(space))
     context = {}
     for name, (dt, pos) in space.get_var_dict().iteritems():
@@ -452,7 +450,7 @@ def aff_from_expr(space, expr):
         context[name] = zero.set_coefficient(dt, pos, 1)
 
     from pymbolic import evaluate
-    return evaluate(expr, context)
+    return zero + evaluate(expr, context)
 
 # }}}