diff --git a/MEMO b/MEMO
index f09d763150d2b0f7750e37b908e7f5ff66ab3075..3111326d31a33b35639a1dfbb9b564d610da858c 100644
--- a/MEMO
+++ b/MEMO
@@ -39,6 +39,8 @@ Things to consider
 To-do
 ^^^^^
 
+- CSE should be more like variable assignment
+
 - dim_max caching
 
 - Fix all tests
@@ -54,6 +56,7 @@ Future ideas
 - Float4 joining on fetch/store?
 
 - How can one automatically generate something like microblocks?
+  -> Some sort of axis-adding transform?
 
 - Better for loop bound generation
   -> Try a triangular loop
@@ -85,6 +88,9 @@ Future ideas
 Dealt with
 ^^^^^^^^^^
 
+- Exhaust the search for a no-boost solution first, before looking
+  for a schedule with boosts.
+
 - Pick not just axis 0, but all axes by lowest available stride
 
 - Scheduler tries too many boostability-related options
diff --git a/loopy/schedule.py b/loopy/schedule.py
index 67ffbb645c257f4f0ec28aea4e3c7a80dfb1ab9b..0b1fb08fbe8d82a11f3f03d703e8a76eb03206b3 100644
--- a/loopy/schedule.py
+++ b/loopy/schedule.py
@@ -232,6 +232,11 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b
     scheduled_insn_ids = set(sched_item.insn_id for sched_item in schedule
             if isinstance(sched_item, RunInstruction))
 
+    if allow_boost is None:
+        rec_allow_boost = None
+    else:
+        rec_allow_boost = False
+
     # {{{ find active and entered loops
 
     active_inames = []
@@ -441,8 +446,10 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b
 
             for iname in tier:
                 new_schedule = schedule + [EnterLoop(iname=iname)]
+
                 for sub_sched in generate_loop_schedules_internal(
                         kernel, loop_priority, new_schedule,
+                        allow_boost=rec_allow_boost,
                         debug=debug):
                     found_viable_schedule = True
                     yield sub_sched
@@ -465,10 +472,10 @@ def generate_loop_schedules_internal(kernel, loop_priority, schedule=[], allow_b
         # if not done, but made some progress--try from the top
         for sub_sched in generate_loop_schedules_internal(
                 kernel, loop_priority, schedule,
-                debug=debug):
+                allow_boost=rec_allow_boost, debug=debug):
             yield sub_sched
     else:
-        if not allow_boost:
+        if not allow_boost and allow_boost is not None:
             # try again with boosting allowed
             for sub_sched in generate_loop_schedules_internal(
                     kernel, loop_priority, schedule=schedule,
@@ -619,22 +626,31 @@ def generate_loop_schedules(kernel, loop_priority=[], debug=None):
 
     debug = SchedulerDebugger(debug)
 
-    for gen_sched in generate_loop_schedules_internal(kernel, loop_priority,
-            debug=debug):
-        gen_sched, owed_barriers = insert_barriers(kernel, gen_sched)
-        if owed_barriers:
-            from warnings import warn
-            from loopy import LoopyAdvisory
-            warn("Barrier insertion finished without inserting barriers for "
-                    "local memory writes in these instructions: '%s'. "
-                    "This often means that local memory was "
-                    "written, but never read." % ",".join(owed_barriers), LoopyAdvisory)
-
-        debug.stop()
-        yield kernel.copy(schedule=gen_sched)
-        debug.start()
-
-        schedule_count += 1
+    generators = [
+            generate_loop_schedules_internal(kernel, loop_priority,
+                debug=debug, allow_boost=None),
+            generate_loop_schedules_internal(kernel, loop_priority,
+                debug=debug)]
+    for gen in generators:
+        for gen_sched in gen:
+            gen_sched, owed_barriers = insert_barriers(kernel, gen_sched)
+            if owed_barriers:
+                from warnings import warn
+                from loopy import LoopyAdvisory
+                warn("Barrier insertion finished without inserting barriers for "
+                        "local memory writes in these instructions: '%s'. "
+                        "This often means that local memory was "
+                        "written, but never read." % ",".join(owed_barriers), LoopyAdvisory)
+
+            debug.stop()
+            yield kernel.copy(schedule=gen_sched)
+            debug.start()
+
+            schedule_count += 1
+
+        # if no-boost mode yielded a viable schedule, stop now
+        if schedule_count:
+            break
 
     debug.done_scheduling()
 
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 07fcde54bc5771f2396e8b6c01ed5c95201977a6..f5d9c2f00e42a78faee6396bde0101d7bb1c1c35 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -368,8 +368,7 @@ def test_rank_one(ctx_factory):
 
     seq_knl = knl
 
-    #for variant in [variant_1, variant_2, variant_4]:
-    for variant in [variant_4]:
+    for variant in [variant_1, variant_2, variant_4]:
         kernel_gen = lp.generate_loop_schedules(variant(knl))
         kernel_gen = lp.check_kernels(kernel_gen, dict(n=n))