diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index 8ad4a08caf226e14a3e524d6b3e29b2f0819ca5a..57255c179ea484c68b9c2ad9ef9be81607bff630 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -146,13 +146,12 @@ def generate_unroll_loop(kernel, sched_index, codegen_state):
 
 
 def intersect_kernel_with_slab(kernel, slab, iname):
-    hdi = kernel.get_home_domain_index(iname)
-    home_domain = kernel.domains[hdi]
-    new_domains = kernel.domains[:]
-    new_domains[hdi] = home_domain & isl.align_spaces(slab, home_domain)
+    from loopy.kernel.tools import DomainChanger
 
-    return kernel.copy(domains=new_domains,
-            get_grid_sizes=kernel.get_grid_sizes)
+    domch = DomainChanger(kernel, (iname,))
+    orig_domain = domch.get_original_domain()
+    orig_domain, slab = isl.align_two(orig_domain, slab)
+    return domch.get_kernel_with(orig_domain & slab)
 
 
 # {{{ hw-parallel loop
@@ -302,35 +301,30 @@ def generate_sequential_loop_dim_code(kernel, sched_index, codegen_state):
                         dt, idx, 1)
 
         _, loop_iname_idx = dom_and_slab.get_var_dict()[loop_iname]
-        lbound = kernel.cache_manager.dim_min(
-                dom_and_slab, loop_iname_idx).coalesce()
-        ubound = kernel.cache_manager.dim_max(
-                dom_and_slab, loop_iname_idx).coalesce()
 
         from loopy.isl_helpers import (
                 static_min_of_pw_aff,
                 static_max_of_pw_aff)
 
-        lbound = static_min_of_pw_aff(lbound,
+        static_lbound = static_min_of_pw_aff(
+                kernel.cache_manager.dim_min(
+                    dom_and_slab, loop_iname_idx).coalesce(),
                 constants_only=False)
-        ubound = static_max_of_pw_aff(ubound,
+        static_ubound = static_max_of_pw_aff(
+                kernel.cache_manager.dim_max(
+                    dom_and_slab, loop_iname_idx).coalesce(),
                 constants_only=False)
 
         # }}}
 
         # {{{ find implemented slab, build inner code
 
-        from loopy.isl_helpers import iname_rel_aff
-        impl_slab = (
-                isl.BasicSet.universe(dom_and_slab.space)
-                .add_constraint(
-                    isl.Constraint.inequality_from_aff(
-                        iname_rel_aff(dom_and_slab.space,
-                            loop_iname, ">=", lbound)))
-                .add_constraint(
-                    isl.Constraint.inequality_from_aff(
-                        iname_rel_aff(dom_and_slab.space,
-                            loop_iname, "<=", ubound))))
+        from loopy.isl_helpers import make_slab_from_bound_pwaffs
+
+        # impl_slab may be overapproximated
+        impl_slab = make_slab_from_bound_pwaffs(
+                dom_and_slab.space,
+                loop_iname, static_lbound, static_ubound)
 
         for iname in moved_inames:
             dt, idx = impl_slab.get_var_dict()[iname]
@@ -340,8 +334,10 @@ def generate_sequential_loop_dim_code(kernel, sched_index, codegen_state):
 
         new_codegen_state = codegen_state.intersect(impl_slab)
 
-        inner = build_loop_nest(kernel, sched_index+1,
-                new_codegen_state)
+        inner = build_loop_nest(
+                intersect_kernel_with_slab(
+                    kernel, slab, iname),
+                sched_index+1, new_codegen_state)
 
         # }}}
 
@@ -352,20 +348,24 @@ def generate_sequential_loop_dim_code(kernel, sched_index, codegen_state):
         from cgen import Initializer, POD, Const, Line, For
         from loopy.symbolic import aff_to_expr
 
-        if (ubound - lbound).plain_is_zero():
+        if (static_ubound - static_lbound).plain_is_zero():
             # single-trip, generate just a variable assignment, not a loop
             result.append(gen_code_block([
                 Initializer(Const(POD(kernel.index_dtype, loop_iname)),
-                    ccm(aff_to_expr(lbound), "i")),
+                    ccm(aff_to_expr(static_lbound), "i")),
                 Line(),
                 inner,
                 ]))
 
         else:
             from loopy.codegen import wrap_in
+            from pyopencl.tools import dtype_to_ctype
+
             result.append(wrap_in(For,
-                    "int %s = %s" % (loop_iname, ccm(aff_to_expr(lbound), "i")),
-                    "%s <= %s" % (loop_iname, ccm(aff_to_expr(ubound), "i")),
+                    "%s %s = %s"
+                    % (dtype_to_ctype(kernel.index_dtype),
+                        loop_iname, ccm(aff_to_expr(static_lbound), "i")),
+                    "%s <= %s" % (loop_iname, ccm(aff_to_expr(static_ubound), "i")),
                     "++%s" % loop_iname,
                     inner))
 
diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py
index 587b1be383c0dc580f4e2d9952aa5c695fec8b0a..201aee16ce3571070aaeeabdce4e6c8b726df7cd 100644
--- a/loopy/isl_helpers.py
+++ b/loopy/isl_helpers.py
@@ -152,6 +152,21 @@ def make_slab(space, iname, start, stop):
     return result
 
 
+def make_slab_from_bound_pwaffs(space, iname, lbound, ubound):
+    dt, pos = space.get_var_dict()[iname]
+    iname_pwaff = isl.PwAff.var_on_domain(space, dt, pos)
+
+    iname_pwaff, lbound = isl.align_two(iname_pwaff, lbound)
+    iname_pwaff, ubound = isl.align_two(iname_pwaff, ubound)
+    assert iname_pwaff.space == lbound.space
+    assert iname_pwaff.space == ubound.space
+
+    return convexify(
+            iname_pwaff.ge_set(lbound)
+            &
+            iname_pwaff.le_set(ubound))
+
+
 def iname_rel_aff(space, iname, rel, aff):
     """*aff*'s domain space is allowed to not match *space*."""
 
@@ -198,7 +213,10 @@ def static_extremum_of_pw_aff(pw_aff, constants_only, set_method, what, context)
     if context is not None:
         reference = reference.intersect(context)
 
+    # {{{ find bounds that are also global bounds
+
     for set, candidate_aff in pieces:
+        # gist can be time-consuming, try without first
         for use_gist in [False, True]:
             if use_gist:
                 candidate_aff = candidate_aff.gist(set)
@@ -209,6 +227,8 @@ def static_extremum_of_pw_aff(pw_aff, constants_only, set_method, what, context)
             if reference <= set_method(pw_aff, candidate_aff):
                 return candidate_aff
 
+    # }}}
+
     raise ValueError("a static %s was not found for PwAff '%s'"
             % (what, pw_aff))
 
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 80ff20c5145c3cd7edf9c4117520118820562044..7957fc9cad3e55d295956df5efbe0ee1159ce319 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -1624,7 +1624,7 @@ def test_slab_decomposition_does_not_double_execute(ctx_factory):
 
     ref_knl = knl
 
-    for outer_tag in ["for", "unr", "l.0"]:
+    for outer_tag in ["for", "g.0"]:
         knl = ref_knl
         knl = lp.split_iname(knl, "i", 4, slabs=(0, 1), inner_tag="unr",
                 outer_tag=outer_tag)
@@ -1639,7 +1639,6 @@ def test_slab_decomposition_does_not_double_execute(ctx_factory):
         evt, _ = knl(queue, a=a_knl)
 
         assert (a_ref == a_knl).get().all()
-        1/0
 
 
 def test_multiple_writes_to_local_temporary(ctx_factory):