diff --git a/bin/loopy b/bin/loopy
index 0ac8ee78a5da228bb7628cd0c8866bf5f29abeef..57dac8038ca638269249783151e4b195b05db223 100644
--- a/bin/loopy
+++ b/bin/loopy
@@ -197,7 +197,10 @@ def main():
     import os
     edit_kernel_env = os.environ.get("LOOPY_EDIT_KERNEL")
     if (args.edit_code
-            or any(edit_kernel_env.lower() in k.name.lower() for k in kernels)):
+            or (
+                edit_kernel_env is not None
+                and
+                any(edit_kernel_env.lower() in k.name.lower() for k in kernels))):
         from pytools import invoke_editor
         code = invoke_editor(code, filename="edit.cl")
 
diff --git a/doc/tutorial.rst b/doc/tutorial.rst
index 0df707d0db82fcf42b60e8405d1f0d189fb31354..25cb764f3cd38c2b10b59edb71d850e28d6da2d6 100644
--- a/doc/tutorial.rst
+++ b/doc/tutorial.rst
@@ -997,9 +997,10 @@ transformation exists in :func:`loopy.add_prefetch`:
     >>> evt, (out,) = knl_pf(queue, a=x_vec_dev)
     #define lid(N) ((int) get_local_id(N))
     ...
-        a_fetch_0 = a[16 * gid(0) + lid(0)];
+        a_fetch = a[16 * gid(0) + lid(0)];
+        acc_k = 0.0f;
         for (int k = 0; k <= 15; ++k)
-          acc_k = acc_k + a_fetch_0;
+          acc_k = acc_k + a_fetch;
         out[16 * gid(0) + lid(0)] = acc_k;
     ...
 
@@ -1021,12 +1022,14 @@ earlier:
     #define lid(N) ((int) get_local_id(N))
     ...
       if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0)
-        a_fetch_0[lid(0)] = a[lid(0) + 16 * gid(0)];
-      barrier(CLK_LOCAL_MEM_FENCE) /* for a_fetch_0 (insn_k_update depends on a_fetch) */;
+        a_fetch[lid(0)] = a[lid(0) + 16 * gid(0)];
+      if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0)
+        acc_k = 0.0f;
+      barrier(CLK_LOCAL_MEM_FENCE) /* for a_fetch (insn_k_update depends on a_fetch_rule) */;
       if (-1 + -16 * gid(0) + -1 * lid(0) + n >= 0)
       {
         for (int k = 0; k <= 15; ++k)
-          acc_k = acc_k + a_fetch_0[lid(0)];
+          acc_k = acc_k + a_fetch[lid(0)];
         out[16 * gid(0) + lid(0)] = acc_k;
       }
     ...
@@ -1151,7 +1154,7 @@ sign that something is amiss:
     >>> evt, (out,) = knl(queue, a=a_mat_dev)
     Traceback (most recent call last):
     ...
-    WriteRaceConditionWarning: instruction 'a_fetch' looks invalid: it assigns to indices based on local IDs, but its temporary 'a_fetch_0' cannot be made local because a write race across the iname(s) 'j_inner' would emerge. (Do you need to add an extra iname to your prefetch?) (add 'write_race_local(a_fetch)' to silenced_warnings kernel argument to disable)
+    WriteRaceConditionWarning: instruction 'a_fetch_rule' looks invalid: it assigns to indices based on local IDs, but its temporary 'a_fetch' cannot be made local because a write race across the iname(s) 'j_inner' would emerge. (Do you need to add an extra iname to your prefetch?) (add 'write_race_local(a_fetch_rule)' to silenced_warnings kernel argument to disable)
 
 When we ask to see the code, the issue becomes apparent:
 
@@ -1167,17 +1170,17 @@ When we ask to see the code, the issue becomes apparent:
     <BLANKLINE>
     __kernel void __attribute__ ((reqd_work_group_size(16, 16, 1))) transpose(__global float const *restrict a, int const n, __global float *restrict out)
     {
-      float a_fetch_0[16];
+      float a_fetch[16];
     <BLANKLINE>
       ...
-          a_fetch_0[lid(0)] = a[n * (lid(0) + 16 * gid(1)) + lid(1) + 16 * gid(0)];
+          a_fetch[lid(0)] = a[n * (lid(0) + 16 * gid(1)) + lid(1) + 16 * gid(0)];
       ...
-          out[n * (lid(1) + gid(0) * 16) + lid(0) + gid(1) * 16] = a_fetch_0[lid(0)];
+          out[n * (lid(1) + gid(0) * 16) + lid(0) + gid(1) * 16] = a_fetch[lid(0)];
       ...
     }
 
 Loopy has a 2D workgroup to use for prefetching of a 1D array. When it
-considers making *a_fetch_0* ``local`` (in the OpenCL memory sense of the word)
+considers making *a_fetch* ``local`` (in the OpenCL memory sense of the word)
 to make use of parallelism in prefetching, it discovers that a write race
 across the remaining axis of the workgroup would emerge.
 
diff --git a/loopy/__init__.py b/loopy/__init__.py
index d417648b7272aeed5b4a594604ea5eb78024be55..ff3a004d98ec4e2f4894c719264c9d05b617fa90 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -944,6 +944,18 @@ def _add_kernel_axis(kernel, axis_name, start, stop, base_inames):
             .insert_dims(dim_type.set, new_dim_idx, 1)
             .set_dim_name(dim_type.set, new_dim_idx, axis_name))
 
+    from loopy.symbolic import get_dependencies
+    deps = get_dependencies(start) | get_dependencies(stop)
+    assert deps <= kernel.all_params()
+
+    param_names = domain.get_var_names(dim_type.param)
+    for dep in deps:
+        if dep not in param_names:
+            new_dim_idx = domain.dim(dim_type.param)
+            domain = (domain
+                    .insert_dims(dim_type.param, new_dim_idx, 1)
+                    .set_dim_name(dim_type.param, new_dim_idx, dep))
+
     from loopy.isl_helpers import make_slab
     slab = make_slab(domain.get_space(), axis_name, start, stop)
 
@@ -1023,7 +1035,9 @@ def _process_footprint_subscripts(kernel, rule_name, sweep_inames,
 
 
 def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,
-        default_tag="l.auto", rule_name=None, footprint_subscripts=None,
+        default_tag="l.auto", rule_name=None,
+        temporary_name=None, temporary_is_local=None,
+        footprint_subscripts=None,
         fetch_bounding_box=False):
     """Prefetch all accesses to the variable *var_name*, with all accesses
     being swept through *sweep_inames*.
@@ -1082,7 +1096,9 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,
     var_name_gen = kernel.get_var_name_generator()
 
     if rule_name is None:
-        rule_name = var_name_gen("%s_fetch" % c_name)
+        rule_name = var_name_gen("%s_fetch_rule" % c_name)
+    if temporary_name is None:
+        temporary_name = var_name_gen("%s_fetch" % c_name)
 
     arg = kernel.arg_dict[var_name]
 
@@ -1119,7 +1135,9 @@ def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,
     new_kernel = precompute(kernel, subst_use, sweep_inames,
             precompute_inames=dim_arg_names,
             default_tag=default_tag, dtype=arg.dtype,
-            fetch_bounding_box=fetch_bounding_box)
+            fetch_bounding_box=fetch_bounding_box,
+            temporary_name=temporary_name,
+            temporary_is_local=temporary_is_local)
 
     # {{{ remove inames that were temporarily added by slice sweeps
 
diff --git a/loopy/check.py b/loopy/check.py
index 01a6e52c268a5c4e76f94653490d05c345e1a7d0..2673e1bdf9e3557a95b3504aac7f184281403214 100644
--- a/loopy/check.py
+++ b/loopy/check.py
@@ -1,7 +1,4 @@
-from __future__ import division
-from __future__ import absolute_import
-import six
-from six.moves import range
+from __future__ import absolute_import, division, print_function
 
 __copyright__ = "Copyright (C) 2012 Andreas Kloeckner"
 
@@ -25,12 +22,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
 
+import six
+from six.moves import range
 
 from islpy import dim_type
 import islpy as isl
 from loopy.symbolic import WalkMapper
 from loopy.diagnostic import LoopyError, WriteRaceConditionWarning, warn
-from loopy.tools import is_integer
 
 import logging
 logger = logging.getLogger(__name__)
@@ -38,16 +36,6 @@ logger = logging.getLogger(__name__)
 
 # {{{ sanity checks run pre-scheduling
 
-def check_temp_variable_shapes_are_constant(kernel):
-    for tv in six.itervalues(kernel.temporary_variables):
-        if any(not is_integer(s_i) for s_i in tv.shape):
-            raise LoopyError("shape of temporary variable '%s' is not "
-                    "constant (but has to be since the size of "
-                    "the temporary needs to be known at build time). "
-                    "Use loopy.fix_parameters to set variables to "
-                    "constant values." % tv.name)
-
-
 def check_insn_attributes(kernel):
     all_insn_ids = set(insn.id for insn in kernel.instructions)
 
@@ -360,7 +348,6 @@ def pre_schedule_checks(kernel):
     try:
         logger.info("pre-schedule check %s: start" % kernel.name)
 
-        check_temp_variable_shapes_are_constant(kernel)
         check_for_orphaned_user_hardware_axes(kernel)
         check_for_double_use_of_hw_axes(kernel)
         check_insn_attributes(kernel)
@@ -484,10 +471,14 @@ def check_implemented_domains(kernel, implemented_domains, code=None):
                 .project_out_except(insn_inames, [dim_type.set]))
 
         insn_domain = kernel.get_inames_domain(insn_inames)
+        insn_parameters = frozenset(insn_domain.get_var_names(dim_type.param))
         assumptions, insn_domain = align_two(assumption_non_param, insn_domain)
         desired_domain = ((insn_domain & assumptions)
-            .project_out_except(insn_inames, [dim_type.set]))
+            .project_out_except(insn_inames, [dim_type.set])
+            .project_out_except(insn_parameters, [dim_type.param]))
 
+        insn_impl_domain = (insn_impl_domain
+                .project_out_except(insn_parameters, [dim_type.param]))
         insn_impl_domain, desired_domain = align_two(
                 insn_impl_domain, desired_domain)
 
diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index c5c0baec6d07fc3daf3adfa5a8e130d712575963..d2d178bc459cb0c231a78a5a1b2c3b8092d07536 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -1074,7 +1074,7 @@ class LoopKernel(RecordWithoutPickling):
         if kernel.schedule is not None:
             lines.append("SCHEDULE:")
             from loopy.schedule import dump_schedule
-            lines.append(dump_schedule(kernel.schedule))
+            lines.append(dump_schedule(kernel, kernel.schedule))
             lines.append(sep)
 
         return "\n".join(lines)
diff --git a/loopy/schedule.py b/loopy/schedule.py
index 1d0dc1221a8280d73cdd03858bb72404c7c79afb..bba42381a5a4f67e957c137a2c866dd816c523c4 100644
--- a/loopy/schedule.py
+++ b/loopy/schedule.py
@@ -171,7 +171,33 @@ def find_used_inames_within(kernel, sched_index):
     return result
 
 
-def find_loop_nest_map(kernel):
+def find_loop_nest_with_map(kernel):
+    """Returns a dictionary mapping inames to other inames that are
+    always nested with them.
+    """
+    result = {}
+
+    from loopy.kernel.data import ParallelTag, IlpBaseTag, VectorizeTag
+
+    all_nonpar_inames = set([
+            iname
+            for iname in kernel.all_inames()
+            if not isinstance(kernel.iname_to_tag.get(iname),
+                (ParallelTag, IlpBaseTag, VectorizeTag))])
+
+    iname_to_insns = kernel.iname_to_insns()
+
+    for iname in all_nonpar_inames:
+        result[iname] = set([
+            other_iname
+            for insn in iname_to_insns[iname]
+            for other_iname in kernel.insn_inames(insn) & all_nonpar_inames
+            ])
+
+    return result
+
+
+def find_loop_nest_around_map(kernel):
     """Returns a dictionary mapping inames to other inames that are
     always nested around them.
     """
@@ -212,20 +238,26 @@ def find_loop_nest_map(kernel):
     return result
 
 
-def find_loop_insn_dep_map(kernel, loop_nest_map):
+def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
     """Returns a dictionary mapping inames to other instruction ids that need to
     be scheduled before the iname should be eligible for scheduling.
     """
 
     result = {}
 
-    from loopy.kernel.data import ParallelTag
+    from loopy.kernel.data import ParallelTag, IlpBaseTag, VectorizeTag
     for insn in kernel.instructions:
         for iname in kernel.insn_inames(insn):
             if isinstance(kernel.iname_to_tag.get(iname), ParallelTag):
                 continue
 
+            iname_dep = result.setdefault(iname, set())
+
             for dep_insn_id in insn.insn_deps:
+                if dep_insn_id in iname_dep:
+                    # already depending, nothing to check
+                    continue
+
                 dep_insn = kernel.id_to_insn[dep_insn_id]
                 dep_insn_inames = kernel.insn_inames(dep_insn)
 
@@ -235,22 +267,43 @@ def find_loop_insn_dep_map(kernel, loop_nest_map):
                     continue
 
                 # To make sure dep_insn belongs outside of iname, we must prove
-                # (via loop_nest_map) that all inames that dep_insn will be
-                # executed inside are nested *around* iname.
-                if not dep_insn_inames <= loop_nest_map[iname]:
+                # that all inames that dep_insn will be executed in nest
+                # outside of the loop over *iname*. (i.e. nested around, or
+                # before).
+
+                may_add_to_loop_dep_map = True
+                for dep_insn_iname in dep_insn_inames:
+                    if dep_insn_iname in loop_nest_around_map[iname]:
+                        # dep_insn_iname is guaranteed to nest outside of iname
+                        # -> safe.
+                        continue
+
+                    tag = kernel.iname_to_tag.get(dep_insn_iname)
+                    if isinstance(tag, (ParallelTag, IlpBaseTag, VectorizeTag)):
+                        # Parallel tags don't really nest, so we'll disregard
+                        # them here.
+                        continue
+
+                    if dep_insn_iname not in loop_nest_with_map.get(iname, []):
+                        # dep_insn_iname does not nest with iname, so its nest
+                        # must occur outside.
+                        continue
+
+                    may_add_to_loop_dep_map = False
+                    break
+
+                if not may_add_to_loop_dep_map:
                     continue
 
-                iname_dep = result.setdefault(iname, set())
-                if dep_insn_id not in iname_dep:
-                    logger.debug("{knl}: loop dependency map: iname '{iname}' "
-                            "depends on '{dep_insn}' via '{insn}'"
-                            .format(
-                                knl=kernel.name,
-                                iname=iname,
-                                dep_insn=dep_insn_id,
-                                insn=insn.id))
+                logger.debug("{knl}: loop dependency map: iname '{iname}' "
+                        "depends on '{dep_insn}' via '{insn}'"
+                        .format(
+                            knl=kernel.name,
+                            iname=iname,
+                            dep_insn=dep_insn_id,
+                            insn=insn.id))
 
-                    iname_dep.add(dep_insn_id)
+                iname_dep.add(dep_insn_id)
 
     return result
 
@@ -269,21 +322,34 @@ def group_insn_counts(kernel):
 
 # {{{ debug help
 
-def dump_schedule(schedule):
-    entries = []
+def dump_schedule(kernel, schedule):
+    lines = []
+    indent = ""
+
+    from loopy.kernel.data import ExpressionInstruction
     for sched_item in schedule:
         if isinstance(sched_item, EnterLoop):
-            entries.append("<%s>" % sched_item.iname)
+            lines.append(indent + "LOOP %s" % sched_item.iname)
+            indent += "    "
         elif isinstance(sched_item, LeaveLoop):
-            entries.append("</%s>" % sched_item.iname)
+            indent = indent[:-4]
+            lines.append(indent + "ENDLOOP %s" % sched_item.iname)
         elif isinstance(sched_item, RunInstruction):
-            entries.append(sched_item.insn_id)
+            insn = kernel.id_to_insn[sched_item.insn_id]
+            if isinstance(insn, ExpressionInstruction):
+                insn_str = "[%s] %s <- %s" % (
+                        insn.id, str(insn.assignee), str(insn.expression))
+            else:
+                insn_str = sched_item.insn_id
+            lines.append(indent + insn_str)
         elif isinstance(sched_item, Barrier):
-            entries.append("|")
+            lines.append(indent + "---BARRIER---")
         else:
             assert False
 
-    return " ".join(entries)
+    return "\n".join(
+            "% 4d: %s" % (i, line)
+            for i, line in enumerate(lines))
 
 
 class ScheduleDebugger:
@@ -343,6 +409,11 @@ class ScheduleDebugger:
     def start(self):
         from time import time
         self.start_time = time()
+
+
+class ScheduleDebugInput(Exception):
+    pass
+
 # }}}
 
 
@@ -352,11 +423,11 @@ class SchedulerState(Record):
     """
     .. attribute:: kernel
 
-    .. attribute:: loop_nest_map
+    .. attribute:: loop_nest_around_map
 
     .. attribute:: loop_priority
 
-        See :func:`loop_nest_map`.
+        See :func:`loop_nest_around_map`.
 
     .. attribute:: breakable_inames
 
@@ -409,7 +480,7 @@ class SchedulerState(Record):
 
 
 def generate_loop_schedules_internal(
-        sched_state, allow_boost=False, allow_insn=False, debug=None):
+        sched_state, allow_boost=False, debug=None):
     # allow_insn is set to False initially and after entering each loop
     # to give loops containing high-priority instructions a chance.
 
@@ -439,18 +510,16 @@ def generate_loop_schedules_internal(
         print(kernel)
         print(75*"=")
         print("CURRENT SCHEDULE:")
-        print("%s (length: %d)" % (
-            dump_schedule(sched_state.schedule),
-            len(sched_state.schedule)))
-        print("(LEGEND: entry into loop: <iname>, exit from loop: </iname>, "
-                "instructions w/ no delimiters)")
+        print(dump_schedule(sched_state.kernel, sched_state.schedule))
         #print("boost allowed:", allow_boost)
         print(75*"=")
         print("LOOP NEST MAP:")
-        for iname, val in six.iteritems(sched_state.loop_nest_map):
+        for iname, val in six.iteritems(sched_state.loop_nest_around_map):
             print("%s : %s" % (iname, ", ".join(val)))
         print(75*"=")
-        print("WHY IS THIS A DEAD-END SCHEDULE?")
+
+        if debug.debug_length == len(debug.longest_rejected_schedule):
+            print("WHY IS THIS A DEAD-END SCHEDULE?")
 
     #if len(schedule) == 2:
         #from pudb import set_trace; set_trace()
@@ -525,10 +594,10 @@ def generate_loop_schedules_internal(
 
         # }}}
 
-        if is_ready and allow_insn:
-            if debug_mode:
-                print("scheduling '%s'" % insn.id)
+        if is_ready and debug_mode:
+            print("ready to schedule '%s'" % insn.id)
 
+        if is_ready and not debug_mode:
             iid_set = frozenset([insn.id])
 
             # {{{ update active group counts for added instruction
@@ -565,8 +634,7 @@ def generate_loop_schedules_internal(
 
             for sub_sched in generate_loop_schedules_internal(
                     new_sched_state,
-                    allow_boost=rec_allow_boost, debug=debug,
-                    allow_insn=True):
+                    allow_boost=rec_allow_boost, debug=debug):
                 yield sub_sched
 
             if not sched_state.group_insn_counts:
@@ -618,15 +686,14 @@ def generate_loop_schedules_internal(
                             can_leave = True
                         break
 
-            if can_leave:
+            if can_leave and not debug_mode:
                 for sub_sched in generate_loop_schedules_internal(
                         sched_state.copy(
                             schedule=(
                                 sched_state.schedule
                                 + (LeaveLoop(iname=last_entered_loop),)),
                             active_inames=sched_state.active_inames[:-1]),
-                        allow_boost=rec_allow_boost, debug=debug,
-                        allow_insn=allow_insn):
+                        allow_boost=rec_allow_boost, debug=debug):
                     yield sub_sched
 
                 return
@@ -667,9 +734,11 @@ def generate_loop_schedules_internal(
 
             currently_accessible_inames = (
                     active_inames_set | sched_state.parallel_inames)
-            if not sched_state.loop_nest_map[iname] <= currently_accessible_inames:
+            if (
+                    not sched_state.loop_nest_around_map[iname]
+                    <= currently_accessible_inames):
                 if debug_mode:
-                    print("scheduling %s prohibited by loop nest map" % iname)
+                    print("scheduling %s prohibited by loop nest-around map" % iname)
                 continue
 
             if (
@@ -694,7 +763,7 @@ def generate_loop_schedules_internal(
                     iname_home_domain.get_var_names(dim_type.param))
 
             # The previous check should have ensured this is true, because
-            # the loop_nest_map takes the domain dependency graph into
+            # the loop_nest_around_map takes the domain dependency graph into
             # consideration.
             assert (iname_home_domain_params & kernel.all_inames()
                     <= currently_accessible_inames)
@@ -790,37 +859,42 @@ def generate_loop_schedules_internal(
 
         if debug_mode:
             print("useful inames: %s" % ",".join(useful_loops_set))
-
-        for tier in priority_tiers:
-            found_viable_schedule = False
-
-            for iname in sorted(tier,
-                    key=lambda iname: iname_to_usefulness.get(iname, 0),
-                    reverse=True):
-
-                for sub_sched in generate_loop_schedules_internal(
-                        sched_state.copy(
-                            schedule=(
-                                sched_state.schedule
-                                + (EnterLoop(iname=iname),)),
-                            active_inames=(
-                                sched_state.active_inames + (iname,)),
-                            entered_inames=(
-                                sched_state.entered_inames | frozenset((iname,))),
-                            ),
-                        allow_boost=rec_allow_boost,
-                        debug=debug):
-                    found_viable_schedule = True
-                    yield sub_sched
-
-            if found_viable_schedule:
-                return
+        else:
+            for tier in priority_tiers:
+                found_viable_schedule = False
+
+                for iname in sorted(tier,
+                        key=lambda iname: iname_to_usefulness.get(iname, 0),
+                        reverse=True):
+
+                    for sub_sched in generate_loop_schedules_internal(
+                            sched_state.copy(
+                                schedule=(
+                                    sched_state.schedule
+                                    + (EnterLoop(iname=iname),)),
+                                active_inames=(
+                                    sched_state.active_inames + (iname,)),
+                                entered_inames=(
+                                    sched_state.entered_inames
+                                    | frozenset((iname,))),
+                                ),
+                            allow_boost=rec_allow_boost,
+                            debug=debug):
+                        found_viable_schedule = True
+                        yield sub_sched
+
+                if found_viable_schedule:
+                    return
 
     # }}}
 
     if debug_mode:
         print(75*"=")
-        six.moves.input("Hit Enter for next schedule:")
+        inp = six.moves.input("Hit Enter for next schedule, "
+                "or enter a number to examine schedules of a "
+                "different length:")
+        if inp:
+            raise ScheduleDebugInput(inp)
 
     if not sched_state.active_inames and not sched_state.unscheduled_insn_ids:
         # if done, yield result
@@ -829,20 +903,11 @@ def generate_loop_schedules_internal(
         yield sched_state.schedule
 
     else:
-        if not allow_insn:
-            # try again with boosting allowed
-            for sub_sched in generate_loop_schedules_internal(
-                    sched_state,
-                    allow_boost=allow_boost, debug=debug,
-                    allow_insn=True):
-                yield sub_sched
-
         if not allow_boost and allow_boost is not None:
             # try again with boosting allowed
             for sub_sched in generate_loop_schedules_internal(
                     sched_state,
-                    allow_boost=True, debug=debug,
-                    allow_insn=allow_insn):
+                    allow_boost=True, debug=debug):
                 yield sub_sched
         else:
             # dead end
@@ -1243,11 +1308,15 @@ def generate_loop_schedules(kernel, debug_args={}):
             iname for iname in kernel.all_inames()
             if isinstance(kernel.iname_to_tag.get(iname), ParallelTag))
 
-    loop_nest_map = find_loop_nest_map(kernel)
+    loop_nest_with_map = find_loop_nest_with_map(kernel)
+    loop_nest_around_map = find_loop_nest_around_map(kernel)
     sched_state = SchedulerState(
             kernel=kernel,
-            loop_nest_map=loop_nest_map,
-            loop_insn_dep_map=find_loop_insn_dep_map(kernel, loop_nest_map),
+            loop_nest_around_map=loop_nest_around_map,
+            loop_insn_dep_map=find_loop_insn_dep_map(
+                kernel,
+                loop_nest_with_map=loop_nest_with_map,
+                loop_nest_around_map=loop_nest_around_map),
             breakable_inames=ilp_inames,
             ilp_inames=ilp_inames,
             vec_inames=vec_inames,
@@ -1289,9 +1358,17 @@ def generate_loop_schedules(kernel, debug_args={}):
             print()
 
             debug.debug_length = len(debug.longest_rejected_schedule)
-            for _ in generate_loop_schedules_internal(sched_state,
-                    debug=debug):
-                pass
+            while True:
+                try:
+                    for _ in generate_loop_schedules_internal(sched_state,
+                            debug=debug):
+                        pass
+
+                except ScheduleDebugInput as e:
+                    debug.debug_length = int(str(e))
+                    continue
+
+                break
 
     try:
         for gen in generators:
@@ -1327,6 +1404,7 @@ def generate_loop_schedules(kernel, debug_args={}):
                 break
 
     except KeyboardInterrupt:
+        print()
         print(75*"-")
         print("Interrupted during scheduling")
         print(75*"-")
diff --git a/setup.cfg b/setup.cfg
index d3f13a0e64b79c00a957cb1369e335e0b8a00d76..2dc94705f792ddca37ca7bf1f8ddec67be7ed3b9 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,3 +1,3 @@
 [flake8]
-ignore = E126,E127,E128,E123,E226,E241,E242,E265,N802
+ignore = E126,E127,E128,E123,E226,E241,E242,E265,N802,W503
 max-line-length=85
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 7cad3504859d199c0581c8d3248ebafe50a34c4a..d7d7dc5768f163a84306ef234af08cf620c4066c 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -2174,6 +2174,23 @@ def test_to_batched(ctx_factory):
     bknl(queue, a=a, x=x)
 
 
+def test_variable_size_temporary():
+    knl = lp.make_kernel(
+         ''' { [i,j]: 0<=i,j<n } ''',
+         ''' out[i] = sum(j, a[i,j])''')
+
+    knl = lp.add_and_infer_dtypes(knl, {"a": np.float32})
+
+    knl = lp.add_prefetch(
+            knl, "a[:,:]", default_tag=None)
+
+    # Make sure that code generation succeeds even if
+    # there are variable-length arrays.
+    knl = lp.preprocess_kernel(knl)
+    for k in lp.generate_loop_schedules(knl):
+        lp.generate_code(k)
+
+
 if __name__ == "__main__":
     if len(sys.argv) > 1:
         exec(sys.argv[1])