diff --git a/loopy/schedule.py b/loopy/schedule.py
index de2cb928e3fa888889144519e2906f0e3c13323a..7afd6bb87df0400ac3f9ef409d0af8cad70aefce 100644
--- a/loopy/schedule.py
+++ b/loopy/schedule.py
@@ -23,8 +23,7 @@ class Barrier(Record):
 
 # }}}
 
-
-
+# {{{ rewrite reduction to imperative form
 
 def realize_reduction(kernel, inames=None, reduction_tag=None):
     new_insns = []
@@ -97,8 +96,95 @@ def realize_reduction(kernel, inames=None, reduction_tag=None):
             instructions=new_insns,
             temporary_variables=new_temporary_variables)
 
+# }}}
 
+# {{{ schedule utilities
 
+def gather_schedule_subloop(schedule, start_idx):
+    assert isinstance(schedule[start_idx], EnterLoop)
+    level = 0
+
+    i = start_idx
+    while i < len(schedule):
+        if isinstance(schedule[i], EnterLoop):
+            level += 1
+        if isinstance(schedule[i], LeaveLoop):
+            level -= 1
+
+            if level == 0:
+                return schedule[start_idx:i+1], i+1
+
+        i += 1
+
+    assert False
+
+
+
+def has_dependent_in_schedule(kernel, insn_id, schedule):
+    from pytools import any
+    return any(sched_item
+            for sched_item in schedule
+            if isinstance(sched_item, RunInstruction)
+            and kernel.id_to_insn[sched_item.insn_id].insn_deps)
+
+
+
+
+def find_active_inames_at(kernel, sched_index):
+    active_inames = []
+
+    from loopy.schedule import EnterLoop, LeaveLoop
+    for sched_item in kernel.schedule[:sched_index]:
+        if isinstance(sched_item, EnterLoop):
+            active_inames.append(sched_item.iname)
+        if isinstance(sched_item, LeaveLoop):
+            active_inames.pop()
+
+    return set(active_inames)
+
+
+
+
+def has_barrier_within(kernel, sched_index):
+    sched_item = kernel.schedule[sched_index]
+
+    if isinstance(sched_item, EnterLoop):
+        loop_contents, _ = gather_schedule_subloop(
+                kernel.schedule, sched_index)
+        from pytools import any
+        return any(isinstance(subsched_item, Barrier)
+                for subsched_item in loop_contents)
+    elif isinstance(sched_item, Barrier):
+        return True
+    else:
+        return False
+
+
+
+
+def find_used_inames_within(kernel, sched_index):
+    sched_item = kernel.schedule[sched_index]
+
+    if isinstance(sched_item, EnterLoop):
+        loop_contents, _ = gather_schedule_subloop(
+                kernel.schedule, sched_index)
+        run_insns = [subsched_item
+                for subsched_item in loop_contents
+                if isinstance(subsched_item, RunInstruction)]
+    elif isinstance(sched_item, RunInstruction):
+        run_insns = [sched_item]
+    else:
+        return set()
+
+    result = set()
+    for sched_item in run_insns:
+        result.update(kernel.id_to_insn[sched_item.insn_id].all_inames())
+
+    return result
+
+# }}}
+
+# {{{ hw axis sanity checks
 
 def check_for_unused_hw_axes(kernel):
     group_size, local_size = kernel.get_grid_sizes_as_exprs()
@@ -145,8 +231,9 @@ def check_for_double_use_of_hw_axes(kernel):
 
                 insn_tag_keys.add(key)
 
+# }}}
 
-
+# {{{ temp storage adjust for bank conflict
 
 def adjust_local_temp_var_storage(kernel):
     new_temp_vars = {}
@@ -212,8 +299,9 @@ def adjust_local_temp_var_storage(kernel):
 
     return kernel.copy(temporary_variables=new_temp_vars)
 
+# }}}
 
-
+# {{{ automatic dependencies
 
 def find_writers(kernel):
     """
@@ -239,6 +327,7 @@ def find_writers(kernel):
 
 
 
+
 def add_automatic_dependencies(kernel):
     writer_map = find_writers(kernel)
 
@@ -278,8 +367,9 @@ def add_automatic_dependencies(kernel):
 
     return kernel.copy(instructions=new_insns)
 
+# }}}
 
-
+# {{{ guess good iname for local axis 0
 
 def guess_good_iname_for_axis_0(kernel, insn):
     from loopy.kernel import ImageArg, ScalarArg
@@ -369,9 +459,9 @@ def guess_good_iname_for_axis_0(kernel, insn):
 
     # }}}
 
+# }}}
 
-
-
+# {{{ assign automatic axes
 
 def assign_automatic_axes(kernel, only_axis_0=True):
     from loopy.kernel import (AutoLocalIndexTagBase, LocalIndexTag,
@@ -381,7 +471,6 @@ def assign_automatic_axes(kernel, only_axis_0=True):
             ignore_auto=True)
 
     def assign_axis(iname, axis=None):
-        print "assign", iname
         desired_length = kernel.get_constant_iname_length(iname)
 
         if axis is None:
@@ -417,7 +506,6 @@ def assign_automatic_axes(kernel, only_axis_0=True):
             new_tag = None
         else:
             new_tag = LocalIndexTag(axis)
-            print iname, desired_length, local_size[axis]
             if desired_length > local_size[axis]:
                 from loopy import split_dimension
                 return assign_automatic_axes(
@@ -477,8 +565,9 @@ def assign_automatic_axes(kernel, only_axis_0=True):
         # All automatic axes are assigned.
         return kernel
 
+# }}}
 
-
+# {{{ scheduling algorithm
 
 def generate_loop_schedules_internal(kernel, schedule=[]):
     all_insn_ids = set(insn.id for insn in kernel.instructions)
@@ -596,38 +685,9 @@ def generate_loop_schedules_internal(kernel, schedule=[]):
             for sub_sched in generate_loop_schedules_internal(kernel, schedule):
                 yield sub_sched
 
+# }}}
 
-
-
-def gather_schedule_subloop(schedule, start_idx):
-    assert isinstance(schedule[start_idx], EnterLoop)
-    level = 0
-
-    i = start_idx
-    while i < len(schedule):
-        if isinstance(schedule[i], EnterLoop):
-            level += 1
-        if isinstance(schedule[i], LeaveLoop):
-            level -= 1
-
-            if level == 0:
-                return schedule[start_idx:i+1], i+1
-
-        i += 1
-
-    assert False
-
-
-
-def has_dependent_in_schedule(kernel, insn_id, schedule):
-    from pytools import any
-    return any(sched_item
-            for sched_item in schedule
-            if isinstance(sched_item, RunInstruction)
-            and kernel.id_to_insn[sched_item.insn_id].insn_deps)
-
-
-
+# {{{ barrier insertion
 
 def insert_barriers(kernel, schedule, level=0):
     result = []
@@ -664,6 +724,8 @@ def insert_barriers(kernel, schedule, level=0):
 
             # {{{ issue dependency-based barriers for contents of nested loop
 
+            # (i.e. if anything *in* the loop depends on something beforehand)
+
             for insn_id in owed_barriers:
                 if has_dependent_in_schedule(kernel, insn_id, subloop):
                     issue_barrier(is_pre_barrier=False)
@@ -676,7 +738,7 @@ def insert_barriers(kernel, schedule, level=0):
 
             # {{{ issue pre-barriers for contents of nested loop
 
-            if not loop_had_barrier:
+            if not loop_had_barrier[0]:
                 for insn_id in sub_owed_barriers:
                     if has_dependent_in_schedule(
                             kernel, insn_id, schedule):
@@ -724,8 +786,9 @@ def insert_barriers(kernel, schedule, level=0):
 
     return result, owed_barriers
 
+# }}}
 
-
+# {{{ main scheduling entrypoint
 
 def generate_loop_schedules(kernel):
     kernel = realize_reduction(kernel)
@@ -746,7 +809,6 @@ def generate_loop_schedules(kernel):
     kernel = add_automatic_dependencies(kernel)
     kernel = adjust_local_temp_var_storage(kernel)
 
-    print kernel
     check_for_double_use_of_hw_axes(kernel)
     check_for_unused_hw_axes(kernel)
 
@@ -756,66 +818,10 @@ def generate_loop_schedules(kernel):
 
         yield kernel.copy(schedule=gen_sched)
 
-
-
-
-# {{{ schedule utilities
-
-def find_active_inames_at(kernel, sched_index):
-    active_inames = []
-
-    from loopy.schedule import EnterLoop, LeaveLoop
-    for sched_item in kernel.schedule[:sched_index]:
-        if isinstance(sched_item, EnterLoop):
-            active_inames.append(sched_item.iname)
-        if isinstance(sched_item, LeaveLoop):
-            active_inames.pop()
-
-    return set(active_inames)
-
-
-
-
-def has_barrier_within(kernel, sched_index):
-    sched_item = kernel.schedule[sched_index]
-
-    if isinstance(sched_item, EnterLoop):
-        loop_contents, _ = gather_schedule_subloop(
-                kernel.schedule, sched_index)
-        from pytools import any
-        return any(isinstance(subsched_item, Barrier)
-                for subsched_item in loop_contents)
-    elif isinstance(sched_item, Barrier):
-        return True
-    else:
-        return False
-
-
-
-
-def find_used_inames_within(kernel, sched_index):
-    sched_item = kernel.schedule[sched_index]
-
-    if isinstance(sched_item, EnterLoop):
-        loop_contents, _ = gather_schedule_subloop(
-                kernel.schedule, sched_index)
-        run_insns = [subsched_item
-                for subsched_item in loop_contents
-                if isinstance(subsched_item, RunInstruction)]
-    elif isinstance(sched_item, RunInstruction):
-        run_insns = [sched_item]
-    else:
-        return set()
-
-    result = set()
-    for sched_item in run_insns:
-        result.update(kernel.id_to_insn[sched_item.insn_id].all_inames())
-
-    return result
-
 # }}}
 
 
 
 
+
 # vim: foldmethod=marker