diff --git a/MEMO b/MEMO
index 1a76b61e040098083629219715aade111f56ad0a..675b8f05793c5b57f6200f753042430b08072434 100644
--- a/MEMO
+++ b/MEMO
@@ -62,8 +62,6 @@ Things to consider
 TODO
 ^^^^
 
-- Make axpy better.
-
 - implemented_domain may end up being smaller than requested in cse
   evaluations--check that!
 
@@ -90,6 +88,10 @@ TODO
 Dealt with
 ^^^^^^^^^^
 
+- Allow prioritization of loops in scheduling.
+
+- Make axpy better.
+
 - Screwy lower bounds in slab decomposition
 
 - reimplement add_prefetch
diff --git a/loopy/schedule.py b/loopy/schedule.py
index cf146782bf59945a857673aef566cfee67918f40..c7e3d57a2d349cbfde2cab0557ac0e078f3b7606 100644
--- a/loopy/schedule.py
+++ b/loopy/schedule.py
@@ -600,7 +600,7 @@ def assign_automatic_axes(kernel, only_axis_0=True):
 
 # {{{ scheduling algorithm
 
-def generate_loop_schedules_internal(kernel, schedule=[]):
+def generate_loop_schedules_internal(kernel, loop_priority, schedule=[]):
     all_insn_ids = set(insn.id for insn in kernel.instructions)
 
     scheduled_insn_ids = set(sched_item.insn_id for sched_item in schedule
@@ -682,7 +682,7 @@ def generate_loop_schedules_internal(kernel, schedule=[]):
             )
 
     if available_loops:
-        found_something_useful = False
+        useful_loops = []
 
         for iname in available_loops:
             # {{{ determine if that gets us closer to being able to scheduling an insn
@@ -699,16 +699,26 @@ def generate_loop_schedules_internal(kernel, schedule=[]):
             if not useful:
                 continue
 
-            found_something_useful = True
+            useful_loops.append(iname)
 
             # }}}
 
+        useful_and_desired = set(useful_loops) & set(loop_priority)
+        if useful_and_desired:
+            # restrict to the first ('highest-priority') loop that's useful
+
+            for iname in loop_priority:
+                if iname in useful_and_desired:
+                    useful_loops = [iname]
+                    break
+
+        for iname in useful_loops:
             new_schedule = schedule + [EnterLoop(iname=iname)]
             for sub_sched in generate_loop_schedules_internal(
-                    kernel, new_schedule):
+                    kernel, loop_priority, new_schedule):
                 yield sub_sched
 
-        if found_something_useful:
+        if useful_loops:
             return
 
     # }}}
@@ -735,7 +745,8 @@ def generate_loop_schedules_internal(kernel, schedule=[]):
     else:
         # if not done, but made some progress--try from the top
         if made_progress:
-            for sub_sched in generate_loop_schedules_internal(kernel, schedule):
+            for sub_sched in generate_loop_schedules_internal(
+                    kernel, loop_priority, schedule):
                 yield sub_sched
 
 # }}}
@@ -843,7 +854,7 @@ def insert_barriers(kernel, schedule, level=0):
 
 # {{{ main scheduling entrypoint
 
-def generate_loop_schedules(kernel):
+def generate_loop_schedules(kernel, loop_priority=[]):
     kernel = realize_reduction(kernel)
 
     # {{{ check that all CSEs have been realized
@@ -867,7 +878,7 @@ def generate_loop_schedules(kernel):
 
     schedule_count = 0
 
-    for gen_sched in generate_loop_schedules_internal(kernel):
+    for gen_sched in generate_loop_schedules_internal(kernel, loop_priority):
         gen_sched, owed_barriers = insert_barriers(kernel, gen_sched)
         assert not owed_barriers
 
diff --git a/test/test_matmul.py b/test/test_matmul.py
index b0b3b123a33920d549c477cabfc96ae5c4f2ef4b..0ed5f12878efbe0daa642eef14d3990da06b9a1b 100644
--- a/test/test_matmul.py
+++ b/test/test_matmul.py
@@ -131,7 +131,8 @@ def test_axpy(ctx_factory):
     refsol = (2*a+3*b).get()
 
     for variant in [variant_cpu, variant_gpu]:
-        kernel_gen = lp.generate_loop_schedules(variant(knl))
+        kernel_gen = lp.generate_loop_schedules(variant(knl),
+                loop_priority=["i_inner_outer"])
         kernel_gen = lp.check_kernels(kernel_gen, dict(n=n), kill_level_min=5)
 
         def launcher(kernel, gsize, lsize, check):