diff --git a/examples/python/feature_usage/constrain_loop_nesting.py b/examples/python/feature_usage/constrain_loop_nesting.py
new file mode 100644
index 0000000000000000000000000000000000000000..c42b12eafd41aae3237902d8d4ee596de44fbdac
--- /dev/null
+++ b/examples/python/feature_usage/constrain_loop_nesting.py
@@ -0,0 +1,69 @@
+import numpy as np
+import loopy as lp
+from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2  # noqa
+
+ref_knl = lp.make_kernel(
+        "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
+        "out[g,h,i,j,k] = 2*a[g,h,i,j,k]",
+        assumptions="n >= 1",
+        )
+ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
+
+knl = lp.constrain_loop_nesting(ref_knl, must_not_nest="~j, j")
+print(knl.loop_nest_constraints)
+print(lp.generate_code_v2(knl).device_code())
+
+knl = lp.constrain_loop_nesting(ref_knl, must_not_nest=("~j", "j"))
+print(knl.loop_nest_constraints)
+print(lp.generate_code_v2(knl).device_code())
+
+knl = lp.constrain_loop_nesting(ref_knl, must_not_nest=("~{i,j}", "{i,j}"))
+print(knl.loop_nest_constraints)
+print(lp.generate_code_v2(knl).device_code())
+
+knl = lp.constrain_loop_nesting(ref_knl, must_not_nest=("~{i,j}", "{i,j}"))
+print(knl.loop_nest_constraints)
+print(lp.generate_code_v2(knl).device_code())
+
+knl = lp.constrain_loop_nesting(ref_knl, must_nest=("k", "{i,j}", "{g,h}"))
+print(knl.loop_nest_constraints)
+print(lp.generate_code_v2(knl).device_code())
+
+# Invalid loop_priority:
+
+try:
+    knl = lp.constrain_loop_nesting(ref_knl, must_not_nest="~{j,i},{j,i}")
+    assert False
+except ValueError as e:
+    assert "Unrecognized character" in str(e)
+    print(e)
+
+try:
+    knl = lp.constrain_loop_nesting(ref_knl, must_not_nest="~{j},{j}")
+    assert False
+except ValueError as e:
+    assert "Unrecognized character" in str(e)
+    print(e)
+
+try:
+    knl = lp.constrain_loop_nesting(ref_knl, must_nest=("k", "~{k}"))
+    assert False
+except ValueError as e:
+    assert "Complement (~) not allowed" in str(e)
+    print(e)
+
+try:
+    knl = lp.constrain_loop_nesting(ref_knl, must_nest=("k", "g", "h"))
+    knl = lp.constrain_loop_nesting(knl, must_nest=("h", "k"))
+    assert False
+except ValueError as e:
+    assert "cycle detected" in str(e)
+    print(e)
+
+try:
+    knl = lp.constrain_loop_nesting(ref_knl, must_not_nest=("~j,i", "{j,i}"))
+    assert False
+except ValueError as e:
+    assert ("Complements of sets containing multiple inames "
+        "must enclose inames in braces") in str(e)
+    print(e)
diff --git a/loopy/__init__.py b/loopy/__init__.py
index 807ce88341a8845a154d853077aea649c0938064..6ad3a2633eaa67cc8756f8391a18ac8951d868d1 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -71,7 +71,7 @@ from loopy.library.reduction import register_reduction_parser
 from loopy.version import VERSION, MOST_RECENT_LANGUAGE_VERSION
 
 from loopy.transform.iname import (
-        set_loop_priority, prioritize_loops, untag_inames,
+        set_loop_priority, prioritize_loops, constrain_loop_nesting, untag_inames,
         split_iname, chunk_iname, join_inames, tag_inames, duplicate_inames,
         rename_iname, remove_unused_inames,
         split_reduction_inward, split_reduction_outward,
@@ -82,7 +82,7 @@ from loopy.transform.iname import (
 
 from loopy.transform.instruction import (
         find_instructions, map_instructions,
-        set_instruction_priority, add_dependency,
+        set_instruction_priority, add_dependency, add_dependencies_v2,
         remove_instructions,
         replace_instruction_ids,
         tag_instructions,
@@ -125,6 +125,9 @@ from loopy.type_inference import infer_unknown_types
 from loopy.preprocess import preprocess_kernel, realize_reduction
 from loopy.schedule import (
     generate_loop_schedules, get_one_scheduled_kernel, get_one_linearized_kernel)
+from loopy.schedule.checker import (
+    create_dependencies_from_legacy_knl,
+    check_linearization_validity)
 from loopy.statistics import (ToCountMap, CountGranularity,
         stringify_stats_mapping, Op, MemAccess, get_op_map, get_mem_access_map,
         get_synchronization_map, gather_access_footprints,
@@ -187,7 +190,8 @@ __all__ = [
 
         # {{{ transforms
 
-        "set_loop_priority", "prioritize_loops", "untag_inames",
+        "set_loop_priority", "prioritize_loops", "constrain_loop_nesting",
+        "untag_inames",
         "split_iname", "chunk_iname", "join_inames", "tag_inames",
         "duplicate_inames",
         "rename_iname", "remove_unused_inames",
@@ -205,7 +209,7 @@ __all__ = [
         "rename_argument", "set_temporary_scope",
 
         "find_instructions", "map_instructions",
-        "set_instruction_priority", "add_dependency",
+        "set_instruction_priority", "add_dependency", "add_dependencies_v2",
         "remove_instructions",
         "replace_instruction_ids",
         "tag_instructions",
@@ -250,6 +254,8 @@ __all__ = [
         "preprocess_kernel", "realize_reduction",
         "generate_loop_schedules",
         "get_one_scheduled_kernel", "get_one_linearized_kernel",
+        "create_dependencies_from_legacy_knl",
+        "check_linearization_validity",
         "GeneratedProgram", "CodeGenerationResult",
         "PreambleInfo",
         "generate_code", "generate_code_v2", "generate_body",
diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py
index 2d926aad4faa511aa2919630c9b0e96b7f253ad9..81e2680ac31b01d66a79bee21c1e3cb20ec71409 100644
--- a/loopy/kernel/__init__.py
+++ b/loopy/kernel/__init__.py
@@ -239,6 +239,7 @@ class LoopKernel(ImmutableRecordWithoutPickling):
 
         A subclass of :class:`loopy.TargetBase`.
     """
+    # TODO document dependencies attribute
 
     # {{{ constructor
 
@@ -258,6 +259,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
 
             iname_slab_increments=None,
             loop_priority=frozenset(),
+            loop_nest_constraints=None,
+            dependencies=frozenset(),
             silenced_warnings=None,
 
             applied_iname_rewrites=None,
@@ -392,6 +395,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
                 assumptions=assumptions,
                 iname_slab_increments=iname_slab_increments,
                 loop_priority=loop_priority,
+                loop_nest_constraints=loop_nest_constraints,
+                dependencies=dependencies,
                 silenced_warnings=silenced_warnings,
                 temporary_variables=temporary_variables,
                 local_sizes=local_sizes,
@@ -1525,6 +1530,8 @@ class LoopKernel(ImmutableRecordWithoutPickling):
             "substitutions",
             "iname_slab_increments",
             "loop_priority",
+            "loop_nest_constraints",
+            "dependencies",
             "silenced_warnings",
             "options",
             "state",
diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py
index 032cdc2760597f1fa6f701a8a88252312deac797..96b0dbc82cce0dde6f1ef0bbc99affdea004ee4a 100644
--- a/loopy/schedule/__init__.py
+++ b/loopy/schedule/__init__.py
@@ -160,7 +160,6 @@ def get_insn_ids_for_block_at(schedule, start_idx):
 def find_active_inames_at(kernel, sched_index):
     active_inames = []
 
-    from loopy.schedule import EnterLoop, LeaveLoop
     for sched_item in kernel.schedule[:sched_index]:
         if isinstance(sched_item, EnterLoop):
             active_inames.append(sched_item.iname)
@@ -269,57 +268,87 @@ def find_loop_nest_around_map(kernel):
     return result
 
 
-def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
+def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map,
+        insn_depends_on_graph):
     """Returns a dictionary mapping inames to other instruction ids that need to
     be scheduled before the iname should be eligible for scheduling.
+
+    :arg loop_nest_with_map: Dictionary mapping iname1 to a set containing
+        iname2 iff either iname1 nests around iname2 or iname2 nests around
+        iname1
+
+    :arg loop_nest_around_map: Dictionary mapping iname1 to a set containing
+        iname2 iff iname2 nests around iname1
+
     """
 
     result = {}
 
     from loopy.kernel.data import ConcurrentTag, IlpBaseTag
+    # for each insn, examine its inames (`iname`) and its dependees' inames
+    # (`dep_iname`) to determine which instructions must be scheduled before
+    # entering the iname loop; create result dict, which maps iname to
+    # instructions that must be scheduled prior to entering iname
+
+    # for each insn, loop over its non-concurrent inames (`iname`)
     for insn in kernel.instructions:
         for iname in kernel.insn_inames(insn):
+            # ignore concurrent iname
             if kernel.iname_tags_of_type(iname, ConcurrentTag):
                 continue
 
+            # if iname is not already in result, add iname as key,
+            # iname_dep = value (set of ids) associated with result[iname]
             iname_dep = result.setdefault(iname, set())
 
-            for dep_insn_id in insn.depends_on:
+            # loop over instructions on which insn depends (dep_insn)
+            # and determine whether dep_insn must be schedued before
+            # iname, in which case add it to iname_dep (result[iname])
+            for dep_insn_id in insn_depends_on_graph.get(insn.id, set()):
                 if dep_insn_id in iname_dep:
                     # already depending, nothing to check
                     continue
 
                 dep_insn = kernel.id_to_insn[dep_insn_id]
+
+                # get dependee insn inames
                 dep_insn_inames = kernel.insn_inames(dep_insn)
 
+                # check whether insn's iname is also in dep_insn's inames
                 if iname in dep_insn_inames:
-                    # Nothing to be learned, dependency is in loop over iname
-                    # already.
+                    # Nothing to be learned, dep_insn is inside loop over iname
                     continue
 
-                # To make sure dep_insn belongs outside of iname, we must prove
-                # that all inames that dep_insn will be executed in nest
-                # outside of the loop over *iname*. (i.e. nested around, or
-                # before).
+                # dep_insn probably must be scheduled before iname loop,
+                # but must prove all dep_insn's inames nest outside/before
+                # the iname loop
 
+                # loop over each of dep_insn's inames (dep_insn_iname)
                 may_add_to_loop_dep_map = True
                 for dep_insn_iname in dep_insn_inames:
+
+                    # if loop_nest_around_map says dep_insn_iname nests around iname,
+                    # dep_insn_iname is guaranteed to nest outside of iname,
+                    # we're safe, so continue
                     if dep_insn_iname in loop_nest_around_map[iname]:
-                        # dep_insn_iname is guaranteed to nest outside of iname
-                        # -> safe.
                         continue
 
+                    # if dep_insn_iname is concurrent, continue
+                    # (parallel tags don't really nest, so disregard them here)
                     if kernel.iname_tags_of_type(dep_insn_iname,
                                 (ConcurrentTag, IlpBaseTag)):
-                        # Parallel tags don't really nest, so we'll disregard
-                        # them here.
                         continue
 
+                    # if loop_nest_with_map says dep_insn_iname does not nest
+                    # inside or around iname, it must be nested separately;
+                    # we're safe, so continue
                     if dep_insn_iname not in loop_nest_with_map.get(iname, []):
-                        # dep_insn_iname does not nest with iname, so its nest
-                        # must occur outside.
                         continue
 
+                    # if at least one of these three cases succeeds for every
+                    # dep_insn_iname, we can add dep_insn to iname's set of insns
+                    # in result dict, otherwise we cannot
+
                     may_add_to_loop_dep_map = False
                     break
 
@@ -334,6 +363,8 @@ def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map):
                             dep_insn=dep_insn_id,
                             insn=insn.id))
 
+                # add dep_insn to result[iname]
+                # (means dep_insn must be scheduled before entering iname loop)
                 iname_dep.add(dep_insn_id)
 
     return result
@@ -349,16 +380,17 @@ def group_insn_counts(kernel):
     return result
 
 
-def gen_dependencies_except(kernel, insn_id, except_insn_ids):
-    insn = kernel.id_to_insn[insn_id]
-    for dep_id in insn.depends_on:
+def gen_dependencies_except(kernel, insn_id, except_insn_ids,
+        insn_depends_on_graph):
+    for dep_id in insn_depends_on_graph.get(insn_id, set()):
 
         if dep_id in except_insn_ids:
             continue
 
         yield dep_id
 
-        for sub_dep_id in gen_dependencies_except(kernel, dep_id, except_insn_ids):
+        for sub_dep_id in gen_dependencies_except(kernel, dep_id,
+                except_insn_ids, insn_depends_on_graph):
             yield sub_dep_id
 
 
@@ -642,9 +674,10 @@ class SchedulerState(ImmutableRecord):
         Used to produce warnings about deprecated 'boosting' behavior
         Should be removed along with boostability in 2017.x.
     """
+    # TODO document insn_depends_on_graph
 
     @property
-    def last_entered_loop(self):
+    def deepest_active_iname(self):
         if self.active_inames:
             return self.active_inames[-1]
         else:
@@ -652,25 +685,36 @@ class SchedulerState(ImmutableRecord):
 
 
 def generate_loop_schedules_internal(
-        sched_state, allow_boost=False, debug=None):
+        sched_state, allow_boost=False, debug=None, _depth_ctr=0):
+    # TODO remove _depth_ctr (just here for debugging)
+    #_print_depth_ctr = True
+    _print_depth_ctr = False
+
     # allow_insn is set to False initially and after entering each loop
     # to give loops containing high-priority instructions a chance.
     kernel = sched_state.kernel
     Fore = kernel.options._fore  # noqa
     Style = kernel.options._style  # noqa
 
+    # TODO ignore boost for now
+    # {{{
     if allow_boost is None:
         rec_allow_boost = None
     else:
         rec_allow_boost = False
+    # }}}
 
     active_inames_set = frozenset(sched_state.active_inames)
 
+    # TODO ignore preschedule for now
+    # {{{
     next_preschedule_item = (
         sched_state.preschedule[0]
         if len(sched_state.preschedule) > 0
         else None)
+    # }}}
 
+    # TODO ignore debug for now
     # {{{ decide about debug mode
 
     debug_mode = False
@@ -708,6 +752,7 @@ def generate_loop_schedules_internal(
 
     # }}}
 
+    # TODO ignore preschedule for now
     # {{{ see if we have reached the start/end of kernel in the preschedule
 
     if isinstance(next_preschedule_item, CallKernel):
@@ -739,12 +784,14 @@ def generate_loop_schedules_internal(
 
     # }}}
 
+    # TODO ignore preschedule for now
     # {{{ see if there are pending barriers in the preschedule
 
     # Barriers that do not have an originating instruction are handled here.
     # (These are automatically inserted by insert_barriers().) Barriers with
     # originating instructions are handled as part of normal instruction
     # scheduling below.
+
     if (
             isinstance(next_preschedule_item, Barrier)
             and next_preschedule_item.originating_insn_id is None):
@@ -782,18 +829,23 @@ def generate_loop_schedules_internal(
     else:
         insn_ids_to_try = sched_state.insn_ids_to_try
 
+    # TODO ignore preschedule for now
+    # {{{
     insn_ids_to_try.extend(
         insn_id
         for item in sched_state.preschedule
         for insn_id in sched_item_to_insn_id(item))
+    # }}}
 
     for insn_id in insn_ids_to_try:
         insn = kernel.id_to_insn[insn_id]
 
-        is_ready = insn.depends_on <= sched_state.scheduled_insn_ids
-
+        # make sure dependees have been scheduled
+        is_ready = sched_state.insn_depends_on_graph.get(
+                insn_id, set()) <= sched_state.scheduled_insn_ids
         if not is_ready:
             if debug_mode:
+                # debug message {{{
                 # These are not that interesting when understanding scheduler
                 # failures.
 
@@ -801,31 +853,42 @@ def generate_loop_schedules_internal(
                 #         format_insn(kernel, insn.id), ",".join(
                 #             insn.depends_on - sched_state.scheduled_insn_ids)))
                 pass
+                # }}}
             continue
 
-        want = kernel.insn_inames(insn) - sched_state.parallel_inames
-        have = active_inames_set - sched_state.parallel_inames
+        nonconc_insn_inames = kernel.insn_inames(insn) - sched_state.parallel_inames
+        nonconc_active_inames = active_inames_set - sched_state.parallel_inames
 
         # If insn is boostable, it may be placed inside a more deeply
         # nested loop without harm.
 
-        orig_have = have
+        orig_nonconc_active_inames = nonconc_active_inames
+        # TODO ignore boost for now
+        # {{{
         if allow_boost:
             # Note that the inames in 'insn.boostable_into' necessarily won't
-            # be contained in 'want'.
-            have = have - insn.boostable_into
+            # be contained in 'nonconc_insn_inames'.
+            nonconc_active_inames = nonconc_active_inames - insn.boostable_into
+        # }}}
 
-        if want != have:
+        if nonconc_insn_inames != nonconc_active_inames:
+            # We don't have the inames we need, may need to open more loops
             is_ready = False
 
+            # TODO ignore debug for now
+            # debug message {{{
             if debug_mode:
-                if want-have:
+                if nonconc_insn_inames-nonconc_active_inames:
                     print("instruction '%s' is missing inames '%s'"
-                            % (format_insn(kernel, insn.id), ",".join(want-have)))
-                if have-want:
+                            % (format_insn(kernel, insn.id),
+                               ",".join(nonconc_insn_inames-nonconc_active_inames)))
+                if nonconc_active_inames-nonconc_insn_inames:
                     print("instruction '%s' won't work under inames '%s'"
-                            % (format_insn(kernel, insn.id), ",".join(have-want)))
+                            % (format_insn(kernel, insn.id),
+                               ",".join(nonconc_active_inames-nonconc_insn_inames)))
+            # }}}
 
+        # TODO ignore preschedule for now
         # {{{ check if scheduling this insn is compatible with preschedule
 
         if insn_id in sched_state.prescheduled_insn_ids:
@@ -845,50 +908,106 @@ def generate_loop_schedules_internal(
 
         # }}}
 
+        # TODO ignoring global barriers for now
+        # {{{ if global barrier, is it allowed?, if not, we must be within subkernel
+        # to schedule insn (any kernel that does not have subkernels)
+
         # {{{ check if scheduler state allows insn scheduling
 
         from loopy.kernel.instruction import BarrierInstruction
+        # TODO (?)could save some time by skipping ahead if we know is_ready=False
         if isinstance(insn, BarrierInstruction) and \
                 insn.synchronization_kind == "global":
             if not sched_state.may_schedule_global_barriers:
+                # debug message {{{
                 if debug_mode:
                     print("can't schedule '%s' because global barriers are "
                           "not currently allowed" % format_insn(kernel, insn.id))
+                # }}}
                 is_ready = False
         else:
             if not sched_state.within_subkernel:
+                # debug message {{{
                 if debug_mode:
                     print("can't schedule '%s' because not within subkernel"
                           % format_insn(kernel, insn.id))
+                # }}}
                 is_ready = False
-
+        # }}}
         # }}}
 
+        # TODO ignore insn groups for now
         # {{{ determine group-based readiness
 
         if insn.conflicts_with_groups & active_groups:
             is_ready = False
 
+            # debug message {{{
             if debug_mode:
                 print("instruction '%s' conflicts with active group(s) '%s'"
                         % (insn.id, ",".join(
                             active_groups & insn.conflicts_with_groups)))
+            # }}}
 
         # }}}
 
-        # {{{ determine reachability
+        # {{{ determine reachability (no active inames conflict w/insn, but
+        # may need more inames)
 
-        if (not is_ready and have <= want):
+        if (not is_ready and nonconc_active_inames <= nonconc_insn_inames):
+            # no active inames conflict with insn, but we may need more active inames
             reachable_insn_ids.add(insn_id)
 
         # }}}
 
+        # {{{ is_ready debug message
         if is_ready and debug_mode:
             print("ready to schedule '%s'" % format_insn(kernel, insn.id))
+        # }}}
+
+        # {{{ check to see if adding insn_id violates dependencies 2.0
+
+        # REQUIRES schedule.checker (to be renamed to linearization.checker)
+        if is_ready:
+            from loopy.schedule.checker import check_linearization_validity
+
+            # get IDs of insns that will have been scheduled if we schedule insn
+            # TODO (For now, ignoring barriers)
+            hypothetical_scheduled_ids = set(
+                [item.insn_id for item in sched_state.schedule
+                    if isinstance(item, RunInstruction)]
+                + [insn.id, ])
+
+            # get subset of dependencies to check
+            # (deps s.t. before+after insn have been scheduled)
+            relevant_deps = set()
+            #for statement_pair_dep_set in kernel.dependencies:
+            for insn_id_before, insn_id_after, constraint_map in kernel.dependencies:
+                # TODO update after dep refactoring
+                if (insn_id_before in hypothetical_scheduled_ids
+                        and insn_id_after in hypothetical_scheduled_ids):
+                    relevant_deps.add(
+                        (insn_id_before, insn_id_after, constraint_map))
+
+            # make sure currently scheduled items don't violate deps
+            if relevant_deps:
+                schedule_items = sched_state.schedule[:] + (
+                    RunInstruction(insn_id=insn.id), )
+                sched_supports_deps = check_linearization_validity(
+                    kernel,
+                    relevant_deps,
+                    schedule_items)
+
+                if not sched_supports_deps:
+                    is_ready = False
+        # }}}
 
         if is_ready and not debug_mode:
+            # schedule this instruction and recurse
             iid_set = frozenset([insn.id])
 
+            # TODO ignore insn groups for now:
+            # new_active_group_counts = sched_state.active_group_counts
             # {{{ update active group counts for added instruction
 
             if insn.groups:
@@ -914,17 +1033,23 @@ def generate_loop_schedules_internal(
             new_insn_ids_to_try.remove(insn.id)
 
             # invalidate instruction_ids_to_try when active group changes
+            # TODO ignore insn groups for now:
+            # {{{
             if set(new_active_group_counts.keys()) != set(
                     sched_state.active_group_counts.keys()):
                 new_insn_ids_to_try = None
+            # }}}
 
             # }}}
 
             new_uses_of_boostability = []
+            # TODO ignore boost for now
+            # {{{
             if allow_boost:
-                if orig_have & insn.boostable_into:
+                if orig_nonconc_active_inames & insn.boostable_into:
                     new_uses_of_boostability.append(
-                            (insn.id, orig_have & insn.boostable_into))
+                        (insn.id, orig_nonconc_active_inames & insn.boostable_into))
+            # }}}
 
             new_sched_state = sched_state.copy(
                     scheduled_insn_ids=sched_state.scheduled_insn_ids | iid_set,
@@ -957,44 +1082,59 @@ def generate_loop_schedules_internal(
 
     # }}}
 
+    # No insns are ready to be scheduled now, but some may be reachable
+    # reachable_insn_ids = no active inames conflict w/insn, but may need more inames
+
     # {{{ see if we're ready to leave the innermost loop
 
-    last_entered_loop = sched_state.last_entered_loop
+    deepest_active_iname = sched_state.deepest_active_iname
 
-    if last_entered_loop is not None:
+    if deepest_active_iname is not None:
         can_leave = True
 
+        # TODO ignore preschedule for now
+        # {{{
         if (
-                last_entered_loop in sched_state.prescheduled_inames
+                deepest_active_iname in sched_state.prescheduled_inames
                 and not (
                     isinstance(next_preschedule_item, LeaveLoop)
-                    and next_preschedule_item.iname == last_entered_loop)):
+                    and next_preschedule_item.iname == deepest_active_iname)):
             # A prescheduled loop can only be left if the preschedule agrees.
+            # debug message {{{
             if debug_mode:
                 print("cannot leave '%s' because of preschedule constraints"
-                      % last_entered_loop)
+                      % deepest_active_iname)
+            # }}}
             can_leave = False
-        elif last_entered_loop not in sched_state.breakable_inames:
+        # }}}
+        elif deepest_active_iname not in sched_state.breakable_inames:
             # If the iname is not breakable, then check that we've
             # scheduled all the instructions that require it.
 
             for insn_id in sched_state.unscheduled_insn_ids:
                 insn = kernel.id_to_insn[insn_id]
-                if last_entered_loop in kernel.insn_inames(insn):
+                if deepest_active_iname in kernel.insn_inames(insn):
+                    # cannot leave deepest_active_iname; insn still depends on it
+                    # TODO ignore debug for now
+                    # {{{
                     if debug_mode:
                         print("cannot leave '%s' because '%s' still depends on it"
-                                % (last_entered_loop, format_insn(kernel, insn.id)))
+                            % (deepest_active_iname, format_insn(kernel, insn.id)))
 
                         # check if there's a dependency of insn that needs to be
-                        # outside of last_entered_loop.
-                        for subdep_id in gen_dependencies_except(kernel, insn_id,
-                                sched_state.scheduled_insn_ids):
+                        # outside of deepest_active_iname.
+                        for subdep_id in gen_dependencies_except(
+                                kernel, insn_id,
+                                sched_state.scheduled_insn_ids,
+                                sched_state.insn_depends_on_graph):
                             subdep = kernel.id_to_insn[insn_id]
-                            want = (kernel.insn_inames(subdep_id)
+                            nonconc_insn_inames = (kernel.insn_inames(subdep_id)
                                     - sched_state.parallel_inames)
                             if (
-                                    last_entered_loop not in want and
-                                    last_entered_loop not in subdep.boostable_into):
+                                    deepest_active_iname not in nonconc_insn_inames
+                                    and
+                                    deepest_active_iname not in subdep.boostable_into
+                                    ):
                                 print(
                                     "%(warn)swarning:%(reset_all)s '%(iname)s', "
                                     "which the schedule is "
@@ -1008,12 +1148,13 @@ def generate_loop_schedules_internal(
                                     % {
                                         "warn": Fore.RED + Style.BRIGHT,
                                         "reset_all": Style.RESET_ALL,
-                                        "iname": last_entered_loop,
+                                        "iname": deepest_active_iname,
                                         "subdep": format_insn_id(kernel, subdep_id),
                                         "dep": format_insn_id(kernel, insn_id),
                                         "subdep_i": format_insn(kernel, subdep_id),
                                         "dep_i": format_insn(kernel, insn_id),
                                         })
+                    # }}}
 
                     can_leave = False
                     break
@@ -1035,22 +1176,52 @@ def generate_loop_schedules_internal(
                     if ignore_count:
                         ignore_count -= 1
                     else:
-                        assert sched_item.iname == last_entered_loop
+                        assert sched_item.iname == deepest_active_iname
                         if seen_an_insn:
                             can_leave = True
                         break
 
+            # don't leave if must_nest constraints require that
+            # additional inames be nested inside the current iname
+            if can_leave:
+                must_nest_graph = (
+                    sched_state.kernel.loop_nest_constraints.must_nest_graph
+                    if sched_state.kernel.loop_nest_constraints else None)
+
+                if must_nest_graph:
+                    # get inames that must nest inside the current iname
+                    must_nest_inside = must_nest_graph[deepest_active_iname]
+
+                    if must_nest_inside:
+                        # get scheduled inames that are nested inside current iname
+                        encountered_iname = False
+                        actually_nested_inside = set()
+                        for sched_item in sched_state.schedule:
+                            if isinstance(sched_item, EnterLoop):
+                                if encountered_iname:
+                                    actually_nested_inside.add(sched_item.iname)
+                                elif sched_item.iname == deepest_active_iname:
+                                    encountered_iname = True
+                            elif (isinstance(sched_item, LeaveLoop) and
+                                    sched_item.iname == deepest_active_iname):
+                                break
+
+                        # don't leave if must_nest constraints require that
+                        # additional inames be nested inside the current iname
+                        if not must_nest_inside.issubset(actually_nested_inside):
+                            can_leave = False
+
             if can_leave and not debug_mode:
 
                 for sub_sched in generate_loop_schedules_internal(
                         sched_state.copy(
                             schedule=(
                                 sched_state.schedule
-                                + (LeaveLoop(iname=last_entered_loop),)),
+                                + (LeaveLoop(iname=deepest_active_iname),)),
                             active_inames=sched_state.active_inames[:-1],
                             preschedule=(
                                 sched_state.preschedule
-                                if last_entered_loop
+                                if deepest_active_iname
                                 not in sched_state.prescheduled_inames
                                 else sched_state.preschedule[1:]),
                         ),
@@ -1061,23 +1232,28 @@ def generate_loop_schedules_internal(
 
     # }}}
 
+    # We're not ready to leave the innermost loop...
+
     # {{{ see if any loop can be entered now
 
     # Find inames that are being referenced by as yet unscheduled instructions.
-    needed_inames = set()
+    unsched_insn_inames_nonconc_still_needed = set()
     for insn_id in sched_state.unscheduled_insn_ids:
-        needed_inames.update(kernel.insn_inames(insn_id))
+        unsched_insn_inames_nonconc_still_needed.update(kernel.insn_inames(insn_id))
 
-    needed_inames = (needed_inames
+    unsched_insn_inames_nonconc_still_needed = (
+            unsched_insn_inames_nonconc_still_needed
             # There's no notion of 'entering' a parallel loop
             - sched_state.parallel_inames
-
             # Don't reenter a loop we're already in.
             - active_inames_set)
 
+    # {{{ debug msg
+
     if debug_mode:
         print(75*"-")
-        print("inames still needed :", ",".join(needed_inames))
+        print("inames still needed :",
+            ",".join(unsched_insn_inames_nonconc_still_needed))
         print("active inames :", ",".join(sched_state.active_inames))
         print("inames entered so far :", ",".join(sched_state.entered_inames))
         print("reachable insns:", ",".join(reachable_insn_ids))
@@ -1086,13 +1262,24 @@ def generate_loop_schedules_internal(
             for grp, c in six.iteritems(sched_state.active_group_counts)))
         print(75*"-")
 
-    if needed_inames:
+    # }}}
+
+    if unsched_insn_inames_nonconc_still_needed:
         iname_to_usefulness = {}
 
-        for iname in needed_inames:
+        currently_accessible_inames = (
+                active_inames_set | sched_state.parallel_inames)
+
+        for iname in unsched_insn_inames_nonconc_still_needed:
 
-            # {{{ check if scheduling this iname now is allowed/plausible
+            # check if scheduling this iname now is allowed/plausible based on
+            # preschedule constraints, loop_nest_around_map,
+            # loop insn dependency map, and data dependencies,
+            # if not, continue
+            # {{{ check if scheduling this iname now is allowed/plausible based on ^
 
+            # TODO ignore preschedule for now
+            # {{{
             if (
                     iname in sched_state.prescheduled_inames
                     and not (
@@ -1103,18 +1290,28 @@ def generate_loop_schedules_internal(
                           % iname)
                 continue
 
-            currently_accessible_inames = (
-                    active_inames_set | sched_state.parallel_inames)
+            # }}}
+
+            # check loop_nest_around_map to determine whether inames that must
+            # nest around iname are available
+            # {{{
             if (
                     not sched_state.loop_nest_around_map[iname]
                     <= currently_accessible_inames):
                 if debug_mode:
                     print("scheduling %s prohibited by loop nest-around map" % iname)
                 continue
+            # }}}
 
+            # loop_insn_dep_map: dict mapping inames to other insn ids that need to
+            # be scheduled before the iname should be eligible for scheduling.
+            # {{{ if loop dependency map prohibits scheduling of iname, continue
             if (
                     not sched_state.loop_insn_dep_map.get(iname, set())
                     <= sched_state.scheduled_insn_ids):
+                # scheduling {iname} prohibited by loop dependency map
+                # (needs '{needed_insns})'
+                # debug message {{{
                 if debug_mode:
                     print(
                             "scheduling {iname} prohibited by loop dependency map "
@@ -1125,8 +1322,10 @@ def generate_loop_schedules_internal(
                                     sched_state.loop_insn_dep_map.get(iname, set())
                                     -
                                     sched_state.scheduled_insn_ids)))
+                # }}}
 
                 continue
+            # }}}
 
             iname_home_domain = kernel.domains[kernel.get_home_domain_index(iname)]
             from islpy import dim_type
@@ -1142,6 +1341,8 @@ def generate_loop_schedules_internal(
             # Check if any parameters are temporary variables, and if so, if their
             # writes have already been scheduled.
 
+            # TODO ignore data dependency for now
+            # {{{
             data_dep_written = True
             for domain_par in (
                     iname_home_domain_params
@@ -1155,130 +1356,168 @@ def generate_loop_schedules_internal(
                                 "parameter '%s' is not yet available"
                                 % (iname, domain_par))
                     break
+            # }}}
 
             if not data_dep_written:
                 continue
 
             # }}}
 
-            # {{{ determine if that gets us closer to being able to schedule an insn
+            # so far, scheduling of iname is allowed/plausible
+
+            # {{{ does entering iname get us closer to scheduling an insn?
 
             usefulness = None  # highest insn priority enabled by iname
 
+            # suppose we were to activate this iname...
+            # would that get us closer to scheduling an insn?
             hypothetically_active_loops = active_inames_set | set([iname])
+            # reachable_insn_ids =
+            # no active inames conflict w/insn, but may need more inames
             for insn_id in reachable_insn_ids:
                 insn = kernel.id_to_insn[insn_id]
 
-                want = kernel.insn_inames(insn) | insn.boostable_into
+                inames_wanted_for_insn = (
+                    kernel.insn_inames(insn) | insn.boostable_into)
 
-                if hypothetically_active_loops <= want:
+                if hypothetically_active_loops <= inames_wanted_for_insn:
                     if usefulness is None:
                         usefulness = insn.priority
                     else:
                         usefulness = max(usefulness, insn.priority)
 
             if usefulness is None:
+                # {{{ iname won't get us closer to scheduling insn; debug msg, cont.
                 if debug_mode:
                     print("iname '%s' deemed not useful" % iname)
                 continue
+                # }}}
 
             iname_to_usefulness[iname] = usefulness
 
             # }}}
 
+        # iname_to_usefulness.keys: inames that get us closer to scheduling an insn
+
         # {{{ tier building
 
-        # Build priority tiers. If a schedule is found in the first tier, then
-        # loops in the second are not even tried (and so on).
-        loop_priority_set = set().union(*[set(prio)
-                                          for prio in
-                                          sched_state.kernel.loop_priority])
+        # inames not yet entered that would get us closer to scheduling an insn:
         useful_loops_set = set(six.iterkeys(iname_to_usefulness))
-        useful_and_desired = useful_loops_set & loop_priority_set
+        if _print_depth_ctr:  # TODO remove
+            print(" "*_depth_ctr+"tier building"+"."*60)
+            print(
+                " "*_depth_ctr+"useful inames including ilp:",
+                useful_loops_set
+                )
 
-        if useful_and_desired:
-            wanted = (
-                useful_and_desired
-                - sched_state.ilp_inames
-                - sched_state.vec_inames
+        from loopy.transform.iname import (
+            check_all_must_not_nests,
+        )
+        from loopy.tools import (
+            get_graph_sources,
+        )
+        from pytools.graph import compute_induced_subgraph
+
+        # since vec_inames must be innermost,
+        # they are not valid canidates unless only vec_inames remain
+        if useful_loops_set - sched_state.vec_inames:
+            useful_loops_set -= sched_state.vec_inames
+
+        # to enter an iname without violating must_nest constraints,
+        # iname must be a source in the induced subgraph of must_nest_graph
+        # containing inames in useful_loops_set
+        must_nest_graph_full = (
+            sched_state.kernel.loop_nest_constraints.must_nest_graph
+            if sched_state.kernel.loop_nest_constraints else None)
+        if must_nest_graph_full:
+            must_nest_graph_useful = compute_induced_subgraph(
+                must_nest_graph_full,
+                useful_loops_set
                 )
-            priority_tiers = [t for t in
-                              get_priority_tiers(wanted,
-                                                 sched_state.kernel.loop_priority
-                                                 )
-                              ]
-
-            # Update the loop priority set, because some constraints may have
-            # have been contradictary.
-            loop_priority_set = set().union(*[set(t) for t in priority_tiers])
-
-            priority_tiers.append(
-                    useful_loops_set
-                    - loop_priority_set
-                    - sched_state.ilp_inames
-                    - sched_state.vec_inames
-                    )
+            source_inames = get_graph_sources(must_nest_graph_useful)
+        else:
+            source_inames = useful_loops_set
+
+        # since graph has a key for every iname,
+        # sources should be the only valid iname candidates
+
+        # check whether entering any source_inames violates
+        # must-not-nest constraints
+        must_not_nest_constraints = (
+            sched_state.kernel.loop_nest_constraints.must_not_nest
+            if sched_state.kernel.loop_nest_constraints else None)
+        if must_not_nest_constraints:
+            next_iname_candidates = set()
+            for next_iname in source_inames:
+                iname_orders_to_check = [
+                    (active_iname, next_iname)
+                    for active_iname in active_inames_set]
+
+                if check_all_must_not_nests(
+                        iname_orders_to_check, must_not_nest_constraints):
+                    next_iname_candidates.add(next_iname)
         else:
-            priority_tiers = [
-                    useful_loops_set
-                    - sched_state.ilp_inames
-                    - sched_state.vec_inames
-                    ]
-
-        # vectorization must be the absolute innermost loop
-        priority_tiers.extend([
-            [iname]
-            for iname in sched_state.ilp_inames
-            if iname in useful_loops_set
-            ])
-
-        priority_tiers.extend([
-            [iname]
-            for iname in sched_state.vec_inames
-            if iname in useful_loops_set
-            ])
+            next_iname_candidates = source_inames
+
+        if _print_depth_ctr:  # TODO remove
+            print(" "*_depth_ctr+"TIERS INIT ======================================")
+        _depth_ctr += 1
+        if _print_depth_ctr:  # TODO remove
+            print(" "*_depth_ctr+"sources:", next_iname_candidates)
 
         # }}}
 
         if debug_mode:
             print("useful inames: %s" % ",".join(useful_loops_set))
         else:
-            for tier in priority_tiers:
-                found_viable_schedule = False
-
-                for iname in sorted(tier,
-                        key=lambda iname: (
-                            iname_to_usefulness.get(iname, 0),
-                            # Sort by iname to achieve deterministic
-                            # ordering of generated schedules.
-                            iname),
-                        reverse=True):
-
-                    for sub_sched in generate_loop_schedules_internal(
-                            sched_state.copy(
-                                schedule=(
-                                    sched_state.schedule
-                                    + (EnterLoop(iname=iname),)),
-                                active_inames=(
-                                    sched_state.active_inames + (iname,)),
-                                entered_inames=(
-                                    sched_state.entered_inames
-                                    | frozenset((iname,))),
-                                preschedule=(
-                                    sched_state.preschedule
-                                    if iname not in sched_state.prescheduled_inames
-                                    else sched_state.preschedule[1:]),
-                                ),
-                            allow_boost=rec_allow_boost,
-                            debug=debug):
-                        found_viable_schedule = True
-                        yield sub_sched
-
-                if found_viable_schedule:
-                    return
+            if _print_depth_ctr:  # TODO remove
+                print(" "*_depth_ctr+"LOOP OVER CANDIDATES ------------------------")
+                print(
+                    " "*_depth_ctr+"loop over these candidates:",
+                    next_iname_candidates)
+            found_viable_schedule = False
+
+            # loop over iname candidates; enter inames and recurse:
+            for iname in sorted(next_iname_candidates,
+                    key=lambda iname: (
+                        iname_to_usefulness.get(iname, 0),
+                        # Sort by iname to achieve deterministic
+                        # ordering of generated schedules.
+                        iname),
+                    reverse=True):
+                if _print_depth_ctr:  # TODO remove
+                    print(" "*(_depth_ctr+1)+"loop over iname candidates:", iname)
+
+                # enter the loop and recurse
+                for sub_sched in generate_loop_schedules_internal(
+                        sched_state.copy(
+                            schedule=(
+                                sched_state.schedule
+                                + (EnterLoop(iname=iname),)),
+                            active_inames=(
+                                sched_state.active_inames + (iname,)),
+                            entered_inames=(
+                                sched_state.entered_inames
+                                | frozenset((iname,))),
+                            preschedule=(
+                                sched_state.preschedule
+                                if iname not in sched_state.prescheduled_inames
+                                else sched_state.preschedule[1:]),
+                            ),
+                        allow_boost=rec_allow_boost,
+                        debug=debug,
+                        _depth_ctr=_depth_ctr):
+
+                    found_viable_schedule = True
+                    yield sub_sched
 
+            # TODO what happened if found_viable_schedule is false?
+            if found_viable_schedule:
+                return
+            _depth_ctr -= 1
     # }}}
 
+    # debug instructions for user {{{
     if debug_mode:
         print(75*"=")
         inp = six.moves.input("Hit Enter for next schedule, "
@@ -1286,14 +1525,37 @@ def generate_loop_schedules_internal(
                 "different length:")
         if inp:
             raise ScheduleDebugInput(inp)
+    # }}}
+
+    # make sure must_nest_constraints satisfied
+    # (the check above avoids contradicting some must_nest constraints,
+    # but we don't know if all required nestings are present)
+    # TODO is this the only place we need to check all must_nest constraints?
+    from loopy.transform.iname import (
+        get_iname_nestings,
+        is_loop_nesting_valid,
+    )
+    must_nest_constraints = (sched_state.kernel.loop_nest_constraints.must_nest
+        if sched_state.kernel.loop_nest_constraints else None)
+    if must_nest_constraints:
+        sched_tiers = get_iname_nestings(sched_state.schedule)
+        must_constraints_satisfied = is_loop_nesting_valid(
+            sched_tiers, must_nest_constraints,
+            must_not_nest_constraints=None,  # (checked upon loop creation)
+            all_inames=kernel.all_inames())
+    else:
+        must_constraints_satisfied = True
 
     if (
             not sched_state.active_inames
             and not sched_state.unscheduled_insn_ids
-            and not sched_state.preschedule):
+            and not sched_state.preschedule
+            and must_constraints_satisfied):
         # if done, yield result
         debug.log_success(sched_state.schedule)
 
+        # TODO ignore boost for now
+        # {{{
         for boost_insn_id, boost_inames in sched_state.uses_of_boostability:
             warn_with_kernel(
                     kernel, "used_boostability",
@@ -1302,6 +1564,7 @@ def generate_loop_schedules_internal(
                     "This is deprecated and will stop working in loopy 2017.x."
                     % (boost_insn_id, ", ".join(boost_inames)),
                     DeprecationWarning)
+        # }}}
 
         yield sched_state.schedule
 
@@ -1311,6 +1574,7 @@ def generate_loop_schedules_internal(
             for sub_sched in generate_loop_schedules_internal(
                     sched_state,
                     allow_boost=True, debug=debug):
+                # TODO check to make sure must_nest constraints satisfied?
                 yield sub_sched
         else:
             # dead end
@@ -1848,6 +2112,29 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
     from loopy.check import pre_schedule_checks
     pre_schedule_checks(kernel)
 
+    # make sure legacy dependencies have become contemporary dependencies
+    # TODO move this to proper location and attach deps to individual stmts
+    # TODO update after dep refactoring
+    if any(insn.depends_on for insn in kernel.instructions):
+        warn_with_kernel(
+            kernel, "legacy_dependencies_found",
+            "Legacy dependencies found in kernel, creating "
+            "corresponding new dependencies before scheduling. "
+            "This may also be accomplished with the following script:\n\n"
+            "from loopy.schedule.checker import "
+            "create_dependencies_from_legacy_knl\n"
+            "deps = create_dependencies_from_legacy_knl(knl)\n"
+            "knl = lp.add_dependencies_v2(knl, deps)\n\n"
+            )
+        from loopy.schedule.checker import (
+            create_dependencies_from_legacy_knl,
+        )
+        from loopy.transform.instruction import (
+            add_dependencies_v2,
+        )
+        deps = create_dependencies_from_legacy_knl(kernel)
+        kernel = add_dependencies_v2(kernel, deps)
+
     schedule_count = 0
 
     debug = ScheduleDebugger(**debug_args)
@@ -1881,13 +2168,44 @@ def generate_loop_schedules_inner(kernel, debug_args={}):
 
     loop_nest_with_map = find_loop_nest_with_map(kernel)
     loop_nest_around_map = find_loop_nest_around_map(kernel)
+
+    # {{{  create dependency graph with edges from depender* to dependee*
+    # iff intersection (SAME_map & DEP_map) is not empty
+
+    from loopy.schedule.checker.dependency import (
+        filter_deps_by_intersection_with_SAME,
+    )
+    from loopy.schedule.checker.utils import (
+        create_graph_from_pairs,
+        get_concurrent_inames,
+    )
+
+    _, non_conc_inames = get_concurrent_inames(kernel)
+    legacy_deps_filtered_by_same = filter_deps_by_intersection_with_SAME(
+            kernel,
+            kernel.dependencies,  # TODO update after dependency refactoring
+            non_conc_inames,
+            )
+
+    # get dep graph edges with edges from depender->dependee
+    dep_graph_pairs = [
+            (insn_id_before, insn_id_after)
+            for insn_id_before, insn_id_after, _ in legacy_deps_filtered_by_same]
+
+    # create dep graph from edges
+    insn_depends_on_graph = create_graph_from_pairs(dep_graph_pairs)
+
+    # }}}
+
     sched_state = SchedulerState(
             kernel=kernel,
             loop_nest_around_map=loop_nest_around_map,
             loop_insn_dep_map=find_loop_insn_dep_map(
                 kernel,
                 loop_nest_with_map=loop_nest_with_map,
-                loop_nest_around_map=loop_nest_around_map),
+                loop_nest_around_map=loop_nest_around_map,
+                insn_depends_on_graph=insn_depends_on_graph),
+            insn_depends_on_graph=insn_depends_on_graph,
             breakable_inames=ilp_inames,
             ilp_inames=ilp_inames,
             vec_inames=vec_inames,
@@ -2015,7 +2333,7 @@ schedule_cache = WriteOncePersistentDict(
         key_builder=LoopyKeyBuilder())
 
 
-def _get_one_scheduled_kernel_inner(kernel):
+def _get_one_scheduled_kernel_inner(kernel, debug_args={}):
     # This helper function exists to ensure that the generator chain is fully
     # out of scope after the function returns. This allows it to be
     # garbage-collected in the exit handler of the
@@ -2025,19 +2343,21 @@ def _get_one_scheduled_kernel_inner(kernel):
     #
     # See https://gitlab.tiker.net/inducer/sumpy/issues/31 for context.
 
-    return next(iter(generate_loop_schedules(kernel)))
+    return next(iter(
+        generate_loop_schedules(kernel, debug_args=debug_args)))
 
 
-def get_one_scheduled_kernel(kernel):
+def get_one_scheduled_kernel(kernel, debug_args={}):
     warn_with_kernel(
         kernel, "get_one_scheduled_kernel_deprecated",
         "get_one_scheduled_kernel is deprecated. "
         "Use get_one_linearized_kernel instead.",
         DeprecationWarning)
-    return get_one_linearized_kernel(kernel)
+    return get_one_linearized_kernel(kernel, debug_args)
+
 
+def get_one_linearized_kernel(kernel, debug_args={}):
 
-def get_one_linearized_kernel(kernel):
     from loopy import CACHING_ENABLED
 
     sched_cache_key = kernel
@@ -2055,7 +2375,8 @@ def get_one_linearized_kernel(kernel):
     if not from_cache:
         with ProcessLogger(logger, "%s: schedule" % kernel.name):
             with MinRecursionLimitForScheduling(kernel):
-                result = _get_one_scheduled_kernel_inner(kernel)
+                result = _get_one_scheduled_kernel_inner(
+                    kernel, debug_args=debug_args)
 
     if CACHING_ENABLED and not from_cache:
         schedule_cache.store_if_not_present(sched_cache_key, result)
diff --git a/loopy/schedule/checker/__init__.py b/loopy/schedule/checker/__init__.py
index 716a0cb58cc4e6ecddbbc3231583d9ddc2a9ef5a..aa1e671616013ed678b5b3adc33e22829466de5f 100644
--- a/loopy/schedule/checker/__init__.py
+++ b/loopy/schedule/checker/__init__.py
@@ -148,3 +148,231 @@ def get_schedule_for_statement_pair(
     # }}}
 
 # }}}
+
+
+def create_dependencies_from_legacy_knl(knl):
+    """Return a list of
+    :class:`loopy.schedule.checker.dependency.TBD`
+    instances created for a :class:`loopy.LoopKernel` containing legacy
+    depencencies.
+
+    Create the new dependencies according to the following rules:
+
+    (1) If a dependency exists between ``insn0`` and ``insn1``, create the
+    dependnecy ``SAME(SNC)`` where ``SNC`` is the set of non-concurrent inames
+    used by both ``insn0`` and ``insn1``, and ``SAME`` is the relationship
+    specified by the ``SAME`` attribute of
+    :class:`loopy.schedule.checker.dependency.DependencyType`.
+
+    (2) For each subset of non-concurrent inames used by any instruction,
+
+        (a), find the set of all instructions using those inames,
+
+        (b), create a directed graph with these instructions as nodes and
+        edges representing a 'happens before' relationship specfied by
+        each dependency,
+
+        (c), find the sources and sinks within this graph, and
+
+        (d), connect each sink to each source (sink happens before source)
+        with a ``PRIOR(SNC)`` dependency, where ``PRIOR`` is the
+        relationship specified by the ``PRIOR`` attribute of
+        :class:`loopy.schedule.checker.dependency.DependencyType`.
+
+    """
+
+    from loopy.schedule.checker.dependency import (
+        create_dependency_constraint,
+        get_dependency_sources_and_sinks,
+        StatementPairDependencySet,
+        DependencyType as dt,
+    )
+    from loopy.schedule.checker.utils import (
+        get_concurrent_inames,
+        get_all_nonconcurrent_insn_iname_subsets,
+        get_linearization_item_ids_within_inames,
+    )
+    from loopy.schedule.checker.schedule import StatementRef
+
+    # Preprocess if not already preprocessed
+    # note: kernels must always be preprocessed before scheduling
+    from loopy import preprocess_kernel
+    preprocessed_knl = preprocess_kernel(knl)
+
+    # Create StatementPairDependencySet(s) from kernel dependencies
+    spds = set()
+
+    # Introduce SAME dep for set of shared, non-concurrent inames
+
+    conc_inames, non_conc_inames = get_concurrent_inames(preprocessed_knl)
+    for insn_after in preprocessed_knl.instructions:
+        for insn_before_id in insn_after.depends_on:
+            insn_before = preprocessed_knl.id_to_insn[insn_before_id]
+            insn_before_inames = insn_before.within_inames
+            insn_after_inames = insn_after.within_inames
+            shared_inames = insn_before_inames & insn_after_inames
+            shared_non_conc_inames = shared_inames & non_conc_inames
+
+            spds.add(
+                StatementPairDependencySet(
+                    StatementRef(insn_id=insn_before.id),
+                    StatementRef(insn_id=insn_after.id),
+                    {dt.SAME: shared_non_conc_inames},
+                    preprocessed_knl.get_inames_domain(insn_before_inames),
+                    preprocessed_knl.get_inames_domain(insn_after_inames),
+                    ))
+
+    # loop-carried deps ------------------------------------------
+
+    # Go through insns and get all unique insn.depends_on iname sets
+    non_conc_iname_subsets = get_all_nonconcurrent_insn_iname_subsets(
+        preprocessed_knl, exclude_empty=True, non_conc_inames=non_conc_inames)
+
+    # For each set of insns within a given iname set, find sources and sinks.
+    # Then make PRIOR dep from all sinks to all sources at previous iterations
+    for iname_subset in non_conc_iname_subsets:
+        # find items within this iname set
+        linearization_item_ids = get_linearization_item_ids_within_inames(
+            preprocessed_knl, iname_subset)
+
+        # find sources and sinks
+        sources, sinks = get_dependency_sources_and_sinks(
+            preprocessed_knl, linearization_item_ids)
+
+        # create prior deps
+
+        # in future, consider inserting single no-op source and sink
+        for source_id in sources:
+            for sink_id in sinks:
+                sink_insn_inames = preprocessed_knl.id_to_insn[sink_id].within_inames
+                source_insn_inames = preprocessed_knl.id_to_insn[source_id].within_inames
+                shared_inames = sink_insn_inames & source_insn_inames
+                shared_non_conc_inames = shared_inames & non_conc_inames
+
+                spds.add(
+                    StatementPairDependencySet(
+                        StatementRef(insn_id=sink_id),
+                        StatementRef(insn_id=source_id),
+                        {dt.PRIOR: shared_non_conc_inames},
+                        preprocessed_knl.get_inames_domain(sink_insn_inames),
+                        preprocessed_knl.get_inames_domain(source_insn_inames),
+                        ))
+
+    dep_maps = set()
+    for statement_pair_dep_set in spds:
+        # create a map representing constraints from the dependency,
+        # which maps statement instance to all stmt instances that must occur later
+        # and is acquired from the non-preprocessed kernel
+        constraint_map = create_dependency_constraint(
+            statement_pair_dep_set,
+            knl.loop_priority,
+            )
+
+        dep_maps.add((
+            statement_pair_dep_set.statement_before.insn_id,
+            statement_pair_dep_set.statement_after.insn_id,
+            constraint_map,
+            ))
+
+    return frozenset(dep_maps)
+
+
+def check_linearization_validity(
+        knl,
+        dep_maps,
+        linearization_items,
+        ):
+    # TODO document
+
+    from loopy.schedule.checker.dependency import (
+        create_dependency_constraint,
+    )
+    from loopy.schedule.checker.lexicographic_order_map import (
+        get_statement_ordering_map,
+    )
+    from loopy.schedule.checker.utils import (
+        prettier_map_string,
+    )
+
+    # Preprocess if not already preprocessed
+    # note: kernels must always be preprocessed before scheduling
+    from loopy import preprocess_kernel
+    preprocessed_knl = preprocess_kernel(knl)
+
+    # For each dependency, create+test linearization containing pair of insns------
+    linearization_is_valid = True
+    #for statement_pair_dep_set in statement_pair_dep_sets:
+    for insn_id_before, insn_id_after, constraint_map in dep_maps:
+        # TODO, since we now get the doms inside
+        # build_maps()
+        # reconsider the content of statement_pair_dep_set, which
+        # currently contains doms(do we still want them there?)
+
+        # Create PairwiseScheduleBuilder: mapping of {statement instance: lex point}
+        # include only instructions involved in this dependency
+        sched_builder = get_schedule_for_statement_pair(
+            preprocessed_knl,
+            linearization_items,
+            insn_id_before,
+            insn_id_after,
+            )
+
+        # Get two isl maps from the PairwiseScheduleBuilder,
+        # one for each linearization item involved in the dependency;
+        isl_sched_map_before, isl_sched_map_after = sched_builder.build_maps(
+            preprocessed_knl)
+
+        # get map representing lexicographic ordering
+        sched_lex_order_map = sched_builder.get_lex_order_map_for_sched_space()
+
+        # create statement instance ordering,
+        # maps each statement instance to all statement instances occuring later
+        sio = get_statement_ordering_map(
+            isl_sched_map_before,
+            isl_sched_map_after,
+            sched_lex_order_map,
+            )
+
+        # reorder variables/params in constraint map space to match SIO so we can
+        # check to see whether the constraint map is a subset of the SIO
+        # (spaces must be aligned so that the variables in the constraint map
+        # correspond to the same variables in the SIO)
+        from loopy.schedule.checker.utils import (
+            ensure_dim_names_match_and_align,
+        )
+
+        aligned_constraint_map = ensure_dim_names_match_and_align(
+            constraint_map, sio)
+
+        import islpy as isl
+        assert aligned_constraint_map.space == sio.space
+        assert (
+            aligned_constraint_map.space.get_var_names(isl.dim_type.in_)
+            == sio.space.get_var_names(isl.dim_type.in_))
+        assert (
+            aligned_constraint_map.space.get_var_names(isl.dim_type.out)
+            == sio.space.get_var_names(isl.dim_type.out))
+        assert (
+            aligned_constraint_map.space.get_var_names(isl.dim_type.param)
+            == sio.space.get_var_names(isl.dim_type.param))
+
+        if not aligned_constraint_map.is_subset(sio):
+
+            linearization_is_valid = False
+
+            print("================ constraint check failure =================")
+            print("Constraint map not subset of SIO")
+            print("Dependencies:")
+            print(insn_id_before+"->"+insn_id_after)
+            print(prettier_map_string(constraint_map))
+            print("Statement instance ordering:")
+            print(prettier_map_string(sio))
+            print("constraint_map.gist(sio):")
+            print(prettier_map_string(aligned_constraint_map.gist(sio)))
+            print("sio.gist(constraint_map)")
+            print(prettier_map_string(sio.gist(aligned_constraint_map)))
+            print("Loop priority known:")
+            print(preprocessed_knl.loop_priority)
+            print("===========================================================")
+
+    return linearization_is_valid
diff --git a/loopy/schedule/checker/dependency.py b/loopy/schedule/checker/dependency.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d22990579f49f7c62b8bb1b6751fa5fbe4ec914
--- /dev/null
+++ b/loopy/schedule/checker/dependency.py
@@ -0,0 +1,707 @@
+__copyright__ = "Copyright (C) 2019 James Stevens"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import islpy as isl
+
+
+class DependencyType:
+    """Strings specifying a particular type of dependency relationship.
+
+    .. attribute:: SAME
+
+       A :class:`str` specifying the following dependency relationship:
+
+       If ``S = {i, j, ...}`` is a set of inames used in both statements
+       ``insn0`` and ``insn1``, and ``{i', j', ...}`` represent the values
+       of the inames in ``insn0``, and ``{i, j, ...}`` represent the
+       values of the inames in ``insn1``, then the dependency
+       ``insn0 happens before insn1 iff SAME({i, j})`` specifies that
+       ``insn0 happens before insn1 iff {i' = i and j' = j and ...}``.
+       Note that ``SAME({}) = True``.
+
+    .. attribute:: PRIOR
+
+       A :class:`str` specifying the following dependency relationship:
+
+       If ``S = {i, j, k, ...}`` is a set of inames used in both statements
+       ``insn0`` and ``insn1``, and ``{i', j', k', ...}`` represent the values
+       of the inames in ``insn0``, and ``{i, j, k, ...}`` represent the
+       values of the inames in ``insn1``, then the dependency
+       ``insn0 happens before insn1 iff PRIOR({i, j, k})`` specifies one of
+       two possibilities, depending on whether the loop nest ordering is
+       known. If the loop nest ordering is unknown, then
+       ``insn0 happens before insn1 iff {i' < i and j' < j and k' < k ...}``.
+       If the loop nest ordering is known, the condition becomes
+       ``{i', j', k', ...}`` is lexicographically less than ``{i, j, k, ...}``,
+       i.e., ``i' < i or (i' = i and j' < j) or (i' = i and j' = j and k' < k) ...``.
+
+    """
+
+    SAME = "same"
+    PRIOR = "prior"
+
+
+class StatementPairDependencySet(object):
+    """A set of dependencies between two statements.
+
+    .. attribute:: statement_before
+
+       A :class:`loopy.schedule.checker.schedule.StatementRef` depended
+        on by statement_after.
+
+    .. attribute:: statement_after
+
+       A :class:`loopy.schedule.checker.schedule.StatementRef` which
+        cdepends on statement_before.
+
+    .. attribute:: deps
+
+       A :class:`dict` mapping instances of :class:`DependencyType` to
+       the :mod:`loopy` kernel inames involved in that particular
+       dependency relationship.
+
+    .. attribute:: dom_before
+
+       A :class:`islpy.BasicSet` representing the domain for the
+       dependee statement.
+
+    .. attribute:: dom_after
+
+       A :class:`islpy.BasicSet` representing the domain for the
+       depender statement.
+
+    """
+
+    def __init__(
+            self,
+            statement_before,
+            statement_after,
+            deps,  # {dep_type: iname_set}
+            dom_before=None,
+            dom_after=None,
+            ):
+        self.statement_before = statement_before
+        self.statement_after = statement_after
+        self.deps = deps
+        self.dom_before = dom_before
+        self.dom_after = dom_after
+
+    def __eq__(self, other):
+        return (
+            self.statement_before == other.statement_before
+            and self.statement_after == other.statement_after
+            and self.deps == other.deps
+            and self.dom_before == other.dom_before
+            and self.dom_after == other.dom_after
+            )
+
+    def __lt__(self, other):
+        return self.__hash__() < other.__hash__()
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def update_persistent_hash(self, key_hash, key_builder):
+        """Custom hash computation function for use with
+        :class:`pytools.persistent_dict.PersistentDict`.
+        """
+
+        key_builder.rec(key_hash, self.statement_before)
+        key_builder.rec(key_hash, self.statement_after)
+        key_builder.rec(key_hash, self.deps)
+        key_builder.rec(key_hash, self.dom_before)
+        key_builder.rec(key_hash, self.dom_after)
+
+    def __str__(self):
+        result = "%s --before->\n%s iff\n    " % (
+            self.statement_before, self.statement_after)
+        return result + " and\n    ".join(
+            ["(%s : %s)" % (dep_type, inames)
+            for dep_type, inames in self.deps.items()])
+
+
+def create_elementwise_comparison_conjunction_set(
+        names0, names1, islvars, op="eq"):
+    """Create a set constrained by the conjunction of conditions comparing
+       `names0` to `names1`.
+
+    :arg names0: A list of :class:`str` representing variable names.
+
+    :arg names1: A list of :class:`str` representing variable names.
+
+    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
+        instances that represent each of the variables
+        (islvars may be produced by `islpy.make_zero_and_vars`). The key
+        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
+
+    :arg op: A :class:`str` describing the operator to use when creating
+        the set constraints. Options: `eq` for `=`, `lt` for `<`
+
+    :returns: A set involving `islvars` cosntrained by the constraints
+        `{names0[0] <op> names1[0] and names0[1] <op> names1[1] and ...}`.
+
+    """
+
+    # initialize set with constraint that is always true
+    conj_set = islvars[0].eq_set(islvars[0])
+    for n0, n1 in zip(names0, names1):
+        if op == "eq":
+            conj_set = conj_set & islvars[n0].eq_set(islvars[n1])
+        elif op == "lt":
+            conj_set = conj_set & islvars[n0].lt_set(islvars[n1])
+
+    return conj_set
+
+
+def _convert_constraint_set_to_map(constraint_set, mv_count, src_position=None):
+    dim_type = isl.dim_type
+    constraint_map = isl.Map.from_domain(constraint_set)
+    if src_position:
+        return constraint_map.move_dims(
+            dim_type.out, 0, dim_type.in_, src_position, mv_count)
+    else:
+        return constraint_map.move_dims(
+            dim_type.out, 0, dim_type.in_, mv_count, mv_count)
+
+
+def create_dependency_constraint(
+        statement_dep_set,
+        loop_priorities,
+        ):
+    """Create a statement dependency constraint represented as a map from
+        each statement instance to statement instances that must occur later,
+        i.e., ``{[s'=0, i', j'] -> [s=1, i, j] : condition on {i', j', i, j}}``
+        indicates that statement ``0`` comes before statment ``1`` when the
+        specified condition on inames ``i',j',i,j`` is met. ``i'`` and ``j'``
+        are the values of inames ``i`` and ``j`` in first statement instance.
+
+    :arg statement_dep_set: A :class:`StatementPairDependencySet` describing
+        the dependency relationship between the two statements.
+
+    :arg loop_priorities: A list of tuples from the ``loop_priority``
+        attribute of :class:`loopy.LoopKernel` specifying the loop nest
+        ordering rules.
+
+    :returns: An :class:`islpy.Map` mapping each statement instance to all
+        statement instances that must occur later according to the constraints.
+
+    """
+
+    from loopy.schedule.checker.utils import (
+        make_islvars_with_marker,
+        append_apostrophes,
+        add_dims_to_isl_set,
+        insert_missing_dims_and_reorder_by_name,
+        append_marker_to_isl_map_var_names,
+        list_var_names_in_isl_sets,
+    )
+    from loopy.schedule.checker.schedule import STATEMENT_VAR_NAME
+    # This function uses the dependency given to create the following constraint:
+    # Statement [s,i,j] comes before statement [s',i',j'] iff <constraint>
+
+    dom_inames_ordered_before = list_var_names_in_isl_sets(
+        [statement_dep_set.dom_before])
+    dom_inames_ordered_after = list_var_names_in_isl_sets(
+        [statement_dep_set.dom_after])
+
+    # create some (ordered) isl vars to use, e.g., {s, i, j, s', i', j'}
+    islvars = make_islvars_with_marker(
+        var_names_needing_marker=[STATEMENT_VAR_NAME]+dom_inames_ordered_before,
+        other_var_names=[STATEMENT_VAR_NAME]+dom_inames_ordered_after,
+        marker="'",
+        )
+    statement_var_name_prime = STATEMENT_VAR_NAME+"'"
+
+    # initialize constraints to False
+    # this will disappear as soon as we add a constraint
+    all_constraints_set = islvars[0].eq_set(islvars[0] + 1)
+
+    # for each (dep_type, inames) pair, create 'happens before' constraint,
+    # all_constraints_set will be the union of all these constraints
+    dt = DependencyType
+    for dep_type, inames in statement_dep_set.deps.items():
+        # need to put inames in a list so that order of inames and inames'
+        # matches when calling create_elementwise_comparison_conj...
+        if not isinstance(inames, list):
+            inames_list = list(inames)
+        else:
+            inames_list = inames[:]
+        inames_prime = append_apostrophes(inames_list)  # e.g., [j', k']
+
+        if dep_type == dt.SAME:
+            constraint_set = create_elementwise_comparison_conjunction_set(
+                    inames_prime, inames_list, islvars, op="eq")
+        elif dep_type == dt.PRIOR:
+
+            priority_known = False
+            # if nesting info is provided:
+            if loop_priorities:
+                # assumes all loop_priority tuples are consistent
+
+                # with multiple priority tuples, determine whether the combined
+                # info they contain can give us a single, full proiritization,
+                # e.g., if prios={(a, b), (b, c), (c, d, e)}, then we know
+                # a -> b -> c -> d -> e
+
+                # remove irrelevant inames from priority tuples (because we're
+                # about to perform a costly operation on remaining tuples)
+                relevant_priorities = set()
+                for p_tuple in loop_priorities:
+                    new_tuple = [iname for iname in p_tuple if iname in inames_list]
+                    # empty tuples and single tuples don't help us define
+                    # a nesting, so ignore them (if we're dealing with a single
+                    # iname, priorities will be ignored later anyway)
+                    if len(new_tuple) > 1:
+                        relevant_priorities.add(tuple(new_tuple))
+
+                # create a mapping from each iname to inames that must be
+                # nested inside that iname
+                nested_inside = {}
+                for outside_iname in inames_list:
+                    nested_inside_inames = set()
+                    for p_tuple in relevant_priorities:
+                        if outside_iname in p_tuple:
+                            nested_inside_inames.update([
+                                inside_iname for inside_iname in
+                                p_tuple[p_tuple.index(outside_iname)+1:]])
+                    nested_inside[outside_iname] = nested_inside_inames
+
+                from loopy.schedule.checker.utils import (
+                    get_orderings_of_length_n)
+                # get all orderings that are explicitly allowed by priorities
+                orders = get_orderings_of_length_n(
+                    nested_inside,
+                    required_length=len(inames_list),
+                    #return_first_found=True,
+                    return_first_found=False,  # slower; allows priorities test below
+                    )
+
+                if orders:
+                    # test for invalid priorities (includes cycles)
+                    if len(orders) != 1:
+                        raise ValueError(
+                            "create_dependency_constriant encountered invalid "
+                            "priorities %s"
+                            % (loop_priorities))
+                    priority_known = True
+                    priority_tuple = orders.pop()
+
+            # if only one loop, we know the priority
+            if not priority_known and len(inames_list) == 1:
+                priority_tuple = tuple(inames_list)
+                priority_known = True
+
+            if priority_known:
+                # PRIOR requires statement_before complete previous iterations
+                # of loops before statement_after completes current iteration
+                # according to loop nest order
+                inames_list_nest_ordered = [
+                    iname for iname in priority_tuple
+                    if iname in inames_list]
+                inames_list_nest_ordered_prime = append_apostrophes(
+                    inames_list_nest_ordered)
+                if set(inames_list_nest_ordered) != set(inames_list):
+                    # TODO could this happen?
+                    assert False
+
+                from loopy.schedule.checker import (
+                    lexicographic_order_map as lom)
+                # TODO handle case where inames list is empty
+                constraint_set = lom.get_lex_order_constraint(
+                    inames_list_nest_ordered_prime,
+                    inames_list_nest_ordered,
+                    islvars,
+                    )
+            else:  # priority not known
+                # PRIOR requires upper left quadrant happen before:
+                constraint_set = create_elementwise_comparison_conjunction_set(
+                        inames_prime, inames_list, islvars, op="lt")
+
+        # get ints representing statements in PairwiseSchedule
+        s_before_int = 0
+        s_after_int = 0 if (
+            statement_dep_set.statement_before.insn_id ==
+            statement_dep_set.statement_after.insn_id
+            ) else 1
+
+        # set statement_var_name == statement #
+        constraint_set = constraint_set & islvars[statement_var_name_prime].eq_set(
+            islvars[0]+s_before_int)
+        constraint_set = constraint_set & islvars[STATEMENT_VAR_NAME].eq_set(
+            islvars[0]+s_after_int)
+
+        # union this constraint_set with all_constraints_set
+        all_constraints_set = all_constraints_set | constraint_set
+
+    # convert constraint set to map
+    all_constraints_map = _convert_constraint_set_to_map(
+        all_constraints_set,
+        mv_count=len(dom_inames_ordered_after)+1,  # +1 for statement var
+        src_position=len(dom_inames_ordered_before)+1,  # +1 for statement var
+        )
+
+    # now apply domain sets to constraint variables
+    statement_var_idx = 0  # index of statement_var dimension in map
+    # (anything other than 0 risks being out of bounds)
+
+    # add statement variable to doms to enable intersection
+    range_to_intersect = add_dims_to_isl_set(
+        statement_dep_set.dom_after, isl.dim_type.out,
+        [STATEMENT_VAR_NAME], statement_var_idx)
+    domain_constraint_set = append_marker_to_isl_map_var_names(
+        statement_dep_set.dom_before, isl.dim_type.set, marker="'")
+    domain_to_intersect = add_dims_to_isl_set(
+        domain_constraint_set, isl.dim_type.out,
+        [statement_var_name_prime], statement_var_idx)
+
+    # insert inames missing from doms to enable intersection
+    domain_to_intersect = insert_missing_dims_and_reorder_by_name(
+        domain_to_intersect, isl.dim_type.out,
+        append_apostrophes([STATEMENT_VAR_NAME] + dom_inames_ordered_before))
+    range_to_intersect = insert_missing_dims_and_reorder_by_name(
+        range_to_intersect,
+        isl.dim_type.out,
+        [STATEMENT_VAR_NAME] + dom_inames_ordered_after)
+
+    # intersect doms
+    map_with_loop_domain_constraints = all_constraints_map.intersect_domain(
+        domain_to_intersect).intersect_range(range_to_intersect)
+
+    return map_with_loop_domain_constraints
+
+
+# TODO no longer used, move elsewhere
+def _create_5pt_stencil_dependency_constraint(
+        dom_before_constraint_set,
+        dom_after_constraint_set,
+        sid_before,
+        sid_after,
+        space_iname,
+        time_iname,
+        all_dom_inames_ordered=None,  # TODO eliminate need for this arg
+        ):
+    """ WIP: NO NEED TO REVIEW YET """
+
+    from loopy.schedule.checker.utils import (
+        make_islvars_with_marker,
+        append_apostrophes,
+        add_dims_to_isl_set,
+        insert_missing_dims_and_reorder_by_name,
+        append_marker_to_isl_map_var_names,
+    )
+    from loopy.schedule.checker.schedule import STATEMENT_VAR_NAME
+    # This function uses the dependency given to create the following constraint:
+    # Statement [s,i,j] comes before statement [s',i',j'] iff <constraint>
+
+    from loopy.schedule.checker.utils import (
+        list_var_names_in_isl_sets,
+    )
+    if all_dom_inames_ordered is None:
+        all_dom_inames_ordered = list_var_names_in_isl_sets(
+            [dom_before_constraint_set, dom_after_constraint_set])
+
+    # create some (ordered) isl vars to use, e.g., {s, i, j, s', i', j'}
+    islvars = make_islvars_with_marker(
+        var_names_needing_marker=[STATEMENT_VAR_NAME]+all_dom_inames_ordered,
+        other_var_names=[STATEMENT_VAR_NAME]+all_dom_inames_ordered,
+        marker="'",
+        )
+    statement_var_name_prime = STATEMENT_VAR_NAME+"'"
+
+    # initialize constraints to False
+    # this will disappear as soon as we add a constraint
+    #all_constraints_set = islvars[0].eq_set(islvars[0] + 1)
+
+    space_iname_prime = space_iname + "'"
+    time_iname_prime = time_iname + "'"
+    one = islvars[0] + 1
+    two = islvars[0] + 2
+    # global:
+    """
+    constraint_set = (
+        islvars[time_iname_prime].gt_set(islvars[time_iname]) &
+            (
+            (islvars[space_iname_prime]-two).lt_set(islvars[space_iname]) &
+             islvars[space_iname].lt_set(islvars[space_iname_prime]+two)
+            )
+        |
+        islvars[time_iname_prime].gt_set(islvars[time_iname] + one) &
+            islvars[space_iname].eq_set(islvars[space_iname_prime])
+        )
+    """
+    # local dep:
+    constraint_set = (
+        islvars[time_iname].eq_set(islvars[time_iname_prime] + one) & (
+            (islvars[space_iname]-two).lt_set(islvars[space_iname_prime]) &
+            islvars[space_iname_prime].lt_set(islvars[space_iname]+two))
+        |
+        (islvars[time_iname].eq_set(islvars[time_iname_prime] + two)
+        & islvars[space_iname_prime].eq_set(islvars[space_iname]))
+        )
+
+    # set statement_var_name == statement #
+    constraint_set = constraint_set & islvars[statement_var_name_prime].eq_set(
+        islvars[0]+sid_before)
+    constraint_set = constraint_set & islvars[STATEMENT_VAR_NAME].eq_set(
+        islvars[0]+sid_after)
+
+    # convert constraint set to map
+    all_constraints_map = _convert_constraint_set_to_map(
+        constraint_set, len(all_dom_inames_ordered) + 1)  # +1 for statement var
+
+    # now apply domain sets to constraint variables
+    statement_var_idx = 0  # index of statement_var dimension in map
+
+    # add statement variable to doms to enable intersection
+    range_to_intersect = add_dims_to_isl_set(
+        dom_after_constraint_set, isl.dim_type.out,
+        [STATEMENT_VAR_NAME], statement_var_idx)
+    domain_constraint_set = append_marker_to_isl_map_var_names(
+          dom_before_constraint_set, isl.dim_type.set, marker="'")
+    domain_to_intersect = add_dims_to_isl_set(
+        domain_constraint_set, isl.dim_type.out,
+        [statement_var_name_prime], statement_var_idx)
+
+    # insert inames missing from doms to enable intersection
+    domain_to_intersect = insert_missing_dims_and_reorder_by_name(
+        domain_to_intersect, isl.dim_type.out,
+        append_apostrophes([STATEMENT_VAR_NAME] + all_dom_inames_ordered))
+    range_to_intersect = insert_missing_dims_and_reorder_by_name(
+        range_to_intersect,
+        isl.dim_type.out,
+        [STATEMENT_VAR_NAME] + all_dom_inames_ordered)
+
+    # intersect doms
+    map_with_loop_domain_constraints = all_constraints_map.intersect_domain(
+        domain_to_intersect).intersect_range(range_to_intersect)
+
+    return map_with_loop_domain_constraints
+
+
+def create_arbitrary_dependency_constraint(
+        constraint_str,
+        dom_before_constraint_set,
+        dom_after_constraint_set,
+        sid_before,
+        sid_after,
+        all_dom_inames_ordered=None,  # TODO eliminate need for this arg
+        ):
+    """ WIP: NO NEED TO REVIEW YET """
+
+    # TODO test after switching primes to before vars
+
+    from loopy.schedule.checker.utils import (
+        make_islvars_with_marker,
+        #append_apostrophes,
+        append_marker_to_strings,
+        add_dims_to_isl_set,
+        insert_missing_dims_and_reorder_by_name,
+        append_marker_to_isl_map_var_names,
+    )
+    from loopy.schedule.checker.schedule import STATEMENT_VAR_NAME
+    # This function uses the constraint given to create the following map:
+    # Statement [s,i,j] comes before statement [s',i',j'] iff <constraint>
+
+    from loopy.schedule.checker.utils import (
+        list_var_names_in_isl_sets,
+    )
+    if all_dom_inames_ordered is None:
+        all_dom_inames_ordered = list_var_names_in_isl_sets(
+            [dom_before_constraint_set, dom_after_constraint_set])
+
+    # create some (ordered) isl vars to use, e.g., {s, i, j, s', i', j'}
+    islvars = make_islvars_with_marker(
+        var_names_needing_marker=[STATEMENT_VAR_NAME]+all_dom_inames_ordered,
+        other_var_names=[STATEMENT_VAR_NAME]+all_dom_inames_ordered,
+        marker="p",
+        )  # TODO figure out before/after notation
+    #statement_var_name_prime = STATEMENT_VAR_NAME+"'"
+    statement_var_name_prime = STATEMENT_VAR_NAME+"p"
+    # TODO figure out before/after notation
+
+    # initialize constraints to False
+    # this will disappear as soon as we add a constraint
+    all_constraints_set = islvars[0].eq_set(islvars[0] + 1)
+    space = all_constraints_set.space
+    from pymbolic import parse
+    from loopy.symbolic import aff_from_expr
+
+    or_constraint_strs = constraint_str.split("or")
+
+    def _quant(s):
+        return "(" + s + ")"
+
+    def _diff(s0, s1):
+        return _quant(s0) + "-" + _quant(s1)
+
+    for or_constraint_str in or_constraint_strs:
+        and_constraint_strs = or_constraint_str.split("and")
+        #conj_constraint = islvars[0].eq_set(islvars[0]) # init to true
+        conj_constraint = isl.BasicSet.universe(space)
+        for cons_str in and_constraint_strs:
+            if "<=" in cons_str:
+                lhs, rhs = cons_str.split("<=")
+                conj_constraint = conj_constraint.add_constraint(
+                    isl.Constraint.inequality_from_aff(
+                        aff_from_expr(space, parse(_diff(rhs, lhs)))))
+                # TODO something more robust than this string meddling^
+            elif ">=" in cons_str:
+                lhs, rhs = cons_str.split(">=")
+                conj_constraint = conj_constraint.add_constraint(
+                    isl.Constraint.inequality_from_aff(
+                        aff_from_expr(space, parse(_diff(lhs, rhs)))))
+            elif "<" in cons_str:
+                lhs, rhs = cons_str.split("<")
+                conj_constraint = conj_constraint.add_constraint(
+                    isl.Constraint.inequality_from_aff(
+                        aff_from_expr(space, parse(_diff(rhs, lhs) + "- 1"))))
+            elif ">" in cons_str:
+                lhs, rhs = cons_str.split(">")
+                conj_constraint = conj_constraint.add_constraint(
+                    isl.Constraint.inequality_from_aff(
+                        aff_from_expr(space, parse(_diff(lhs, rhs) + "- 1"))))
+            elif "=" in cons_str:
+                lhs, rhs = cons_str.split("=")
+                conj_constraint = conj_constraint.add_constraint(
+                    isl.Constraint.equality_from_aff(
+                        aff_from_expr(space, parse(_diff(lhs, rhs)))))
+            else:
+                1/0
+        all_constraints_set = all_constraints_set | conj_constraint
+
+    # set statement_var_name == statement #
+    all_constraints_set = (
+        all_constraints_set & islvars[statement_var_name_prime].eq_set(
+            islvars[0]+sid_before)
+        )
+    all_constraints_set = (
+        all_constraints_set & islvars[STATEMENT_VAR_NAME].eq_set(
+            islvars[0]+sid_after)
+        )
+
+    # convert constraint set to map
+    all_constraints_map = _convert_constraint_set_to_map(
+        all_constraints_set, len(all_dom_inames_ordered) + 1)  # +1 for statement var
+
+    # now apply domain sets to constraint variables
+    statement_var_idx = 0  # index of statement_var dimension in map
+
+    # add statement variable to doms to enable intersection
+    range_to_intersect = add_dims_to_isl_set(
+        dom_after_constraint_set, isl.dim_type.out,
+        [STATEMENT_VAR_NAME], statement_var_idx)
+    domain_constraint_set = append_marker_to_isl_map_var_names(
+          dom_before_constraint_set, isl.dim_type.set, marker="p")
+    # TODO figure out before/after notation
+    domain_to_intersect = add_dims_to_isl_set(
+        domain_constraint_set, isl.dim_type.out,
+        [statement_var_name_prime], statement_var_idx)
+
+    # insert inames missing from doms to enable intersection
+    domain_to_intersect = insert_missing_dims_and_reorder_by_name(
+        domain_to_intersect, isl.dim_type.out,
+        append_marker_to_strings(  # TODO figure out before/after notation
+            [STATEMENT_VAR_NAME] + all_dom_inames_ordered, "p"))
+    range_to_intersect = insert_missing_dims_and_reorder_by_name(
+        range_to_intersect,
+        isl.dim_type.out,
+        [STATEMENT_VAR_NAME] + all_dom_inames_ordered)
+
+    # intersect doms
+    map_with_loop_domain_constraints = all_constraints_map.intersect_domain(
+        domain_to_intersect).intersect_range(range_to_intersect)
+
+    return map_with_loop_domain_constraints
+
+
+def get_dependency_sources_and_sinks(knl, linearization_item_ids):
+    """Implicitly create a directed graph with the linearization items specified
+    by ``linearization_item_ids`` as nodes, and with edges representing a
+    'happens before' relationship specfied by each legacy dependency between
+    two instructions. Return the sources and sinks within this graph.
+
+    :arg linearization_item_ids: A :class:`list` of :class:`str` representing
+        loopy instruction ids.
+
+    :returns: Two instances of :class:`set` of :class:`str` instruction ids
+        representing the sources and sinks in the dependency graph.
+
+    """
+    sources = set()
+    dependees = set()  # all dependees (within linearization_item_ids)
+    for item_id in linearization_item_ids:
+        # find the deps within linearization_item_ids
+        deps = knl.id_to_insn[item_id].depends_on & linearization_item_ids
+        if deps:
+            # add deps to dependees
+            dependees.update(deps)
+        else:  # has no deps (within linearization_item_ids), this is a source
+            sources.add(item_id)
+
+    # sinks don't point to anyone
+    sinks = linearization_item_ids - dependees
+
+    return sources, sinks
+
+
+def filter_deps_by_intersection_with_SAME(
+        knl,
+        deps,
+        non_conc_inames,
+        ):
+    # TODO document
+    from loopy.schedule.checker.schedule import StatementRef
+
+    dt = DependencyType
+
+    # determine which dep relations have a non-empty intersection with
+    # the SAME relation
+    deps_filtered = []
+    for insn_id_before, insn_id_after, dep_constraint_map in deps:
+
+        # create isl map representing "SAME" dep for these two insns
+        shared_nc_inames = (
+            knl.id_to_insn[insn_id_before].within_inames &
+            knl.id_to_insn[insn_id_after].within_inames &
+            non_conc_inames)
+
+        same_dep_set = StatementPairDependencySet(
+            StatementRef(insn_id=insn_id_before),
+            StatementRef(insn_id=insn_id_after),
+            {dt.SAME: shared_nc_inames},
+            knl.get_inames_domain(knl.id_to_insn[insn_id_before].within_inames),
+            knl.get_inames_domain(knl.id_to_insn[insn_id_after].within_inames),
+            )
+
+        same_dep_constraint_map = create_dependency_constraint(
+                    same_dep_set,
+                    knl.loop_priority,  # TODO use new must_nest
+                    )
+
+        # see whether the intersection of dep map and SAME dep map exists
+        intersect_dep_and_same = same_dep_constraint_map & dep_constraint_map
+        intersect_not_empty = not bool(intersect_dep_and_same.is_empty())
+
+        if intersect_not_empty:
+            deps_filtered.append((insn_id_before, insn_id_after, dep_constraint_map))
+
+    return deps_filtered
diff --git a/loopy/schedule/checker/experimental_scripts/example_pairwise_schedule_validity.py b/loopy/schedule/checker/experimental_scripts/example_pairwise_schedule_validity.py
new file mode 100644
index 0000000000000000000000000000000000000000..717dde1416ff91922edb5a77ac237382b5e6a4de
--- /dev/null
+++ b/loopy/schedule/checker/experimental_scripts/example_pairwise_schedule_validity.py
@@ -0,0 +1,368 @@
+__copyright__ = "Copyright (C) 2019 James Stevens"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+""" WIP: NO NEED TO REVIEW YET """
+import loopy as lp
+import numpy as np
+from loopy.schedule.checker.utils import (
+    create_graph_from_pairs,
+)
+from loopy.schedule.checker.dependency import (
+    filter_deps_by_intersection_with_SAME,
+)
+from loopy import (
+    preprocess_kernel,
+    get_one_linearized_kernel,
+)
+
+# Choose kernel ----------------------------------------------------------
+
+knl_choice = "example"
+#knl_choice = "unused_inames"
+#knl_choice = "matmul"
+#knl_choice = "scan"
+#knl_choice = "dependent_domain"
+#knl_choice = "stroud_bernstein_orig"  # TODO invalid sched?
+#knl_choice = "ilp_kernel"
+#knl_choice = "add_barrier"
+#knl_choice = "nop"
+#knl_choice = "nest_multi_dom"
+#knl_choice = "loop_carried_deps"
+
+if knl_choice == "example":
+    knl = lp.make_kernel(
+        [
+            "{[i,ii]: 0<=i<ii<pi}",
+            "{[k]: 0<=k<pk}",
+            "{[j,jj]: 0<=j<jj<pj}",
+            "{[t]: 0<=t<pt}",
+        ],
+        """
+        for i
+            for k
+                <>temp = b[i,k]  {id=insn_a}
+            end
+            for j
+                a[i,j] = temp + 1  {id=insn_b,dep=insn_a}
+                c[i,j] = d[i,j]  {id=insn_c}
+            end
+        end
+        for t
+            e[t] = f[t]  {id=insn_d}
+        end
+        """,
+        name="example",
+        assumptions="pi,pj,pk,pt >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.add_and_infer_dtypes(
+            knl,
+            {"b": np.float32, "d": np.float32, "f": np.float32})
+    #knl = lp.tag_inames(knl, {"i": "l.0"})
+    #knl = lp.prioritize_loops(knl, "i,k,j")
+    knl = lp.prioritize_loops(knl, "i,k")
+    knl = lp.prioritize_loops(knl, "i,j")
+if knl_choice == "unused_inames":
+    knl = lp.make_kernel(
+        [
+            "{[i,ii]: 0<=i<ii<pi}",
+            "{[k]: 0<=k<pk}",
+            "{[j,jj]: 0<=j<jj<pj}",
+        ],
+        """
+        for i
+            for k
+                <>temp = b[i,k]  {id=insn_a}
+            end
+            for j
+                a[i,j] = temp + 1  {id=insn_b,dep=insn_a}
+            end
+        end
+        """,
+        name="unused_inames",
+        assumptions="pi,pj,pk >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.add_and_infer_dtypes(
+            knl,
+            {"b": np.float32})
+    #knl = lp.tag_inames(knl, {"i": "l.0"})
+    #knl = lp.prioritize_loops(knl, "i,k,j")
+    knl = lp.prioritize_loops(knl, "i,k")
+    knl = lp.prioritize_loops(knl, "i,j")
+elif knl_choice == "matmul":
+    bsize = 16
+    knl = lp.make_kernel(
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
+            [
+                "c[i, j] = sum(k, a[i, k]*b[k, j])"
+            ],
+            name="matmul",
+            assumptions="n,m,ell >= 1",
+            lang_version=(2018, 2),
+            )
+    knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32))
+    knl = lp.split_iname(knl, "i", bsize, outer_tag="g.0", inner_tag="l.1")
+    knl = lp.split_iname(knl, "j", bsize, outer_tag="g.1", inner_tag="l.0")
+    knl = lp.split_iname(knl, "k", bsize)
+    knl = lp.add_prefetch(knl, "a", ["k_inner", "i_inner"], default_tag="l.auto")
+    knl = lp.add_prefetch(knl, "b", ["j_inner", "k_inner"], default_tag="l.auto")
+    knl = lp.prioritize_loops(knl, "k_outer,k_inner")
+elif knl_choice == "scan":
+    stride = 1
+    n_scan = 16
+    knl = lp.make_kernel(
+        "[n] -> {[i,j]: 0<=i<n and 0<=j<=%d*i}" % stride,
+        """
+        a[i] = sum(j, j**2)
+        """,
+        name="scan",
+        lang_version=(2018, 2),
+        )
+
+    knl = lp.fix_parameters(knl, n=n_scan)
+    knl = lp.realize_reduction(knl, force_scan=True)
+elif knl_choice == "dependent_domain":
+    knl = lp.make_kernel(
+        [
+            "[n] -> {[i]: 0<=i<n}",
+            "{[j]: 0<=j<=2*i}"
+        ],
+        """
+        a[i] = sum(j, j**2) {id=scan}
+        """,
+        name="dependent_domain",
+        lang_version=(2018, 2),
+        )
+    knl = lp.realize_reduction(knl, force_scan=True)
+elif knl_choice == "stroud_bernstein_orig":
+    knl = lp.make_kernel(
+            "{[el, i2, alpha1,alpha2]: \
+                    0 <= el < nels and \
+                    0 <= i2 < nqp1d and \
+                    0 <= alpha1 <= deg and 0 <= alpha2 <= deg-alpha1 }",
+            """
+            for el,i2
+                <> xi = qpts[1, i2]
+                <> s = 1-xi
+                <> r = xi/s
+                <> aind = 0 {id=aind_init}
+                for alpha1
+                    <> w = s**(deg-alpha1) {id=init_w}
+                    for alpha2
+                        tmp[el,alpha1,i2] = tmp[el,alpha1,i2] + w * coeffs[aind] \
+                                {id=write_tmp,dep=init_w:aind_init}
+                        w = w * r * ( deg - alpha1 - alpha2 ) / (1 + alpha2) \
+                                {id=update_w,dep=init_w:write_tmp}
+                        aind = aind + 1 \
+                                {id=aind_incr,dep=aind_init:write_tmp:update_w}
+                    end
+                end
+            end
+            """,
+            [lp.GlobalArg("coeffs", None, shape=None), "..."],
+            name="stroud_bernstein_orig", assumptions="deg>=0 and nels>=1")
+    knl = lp.add_and_infer_dtypes(knl,
+        dict(coeffs=np.float32, qpts=np.int32))
+    knl = lp.fix_parameters(knl, nqp1d=7, deg=4)
+    knl = lp.split_iname(knl, "el", 16, inner_tag="l.0")
+    knl = lp.split_iname(knl, "el_outer", 2, outer_tag="g.0",
+        inner_tag="ilp", slabs=(0, 1))
+    knl = lp.tag_inames(knl, dict(i2="l.1", alpha1="unr", alpha2="unr"))
+    # Must declare coeffs to have "no" shape, to keep loopy
+    # from trying to figure it out the shape automatically.
+elif knl_choice == "ilp_kernel":
+    knl = lp.make_kernel(
+            "{[i,j,ilp_iname]: 0 <= i,j < n and 0 <= ilp_iname < 4}",
+            """
+            for i
+            for j
+            for ilp_iname
+                tmp[i,j,ilp_iname] = 3.14
+            end
+            end
+            end
+            """,
+            name="ilp_kernel",
+            assumptions="n>=1 and n mod 4 = 0",
+            )
+    # TODO why is conditional on ilp_name?
+    knl = lp.tag_inames(knl, {"j": "l.0", "ilp_iname": "ilp"})
+    #knl = lp.prioritize_loops(knl, "i_outer_outer,i_outer_inner,i_inner,a")
+if knl_choice == "add_barrier":
+    np.random.seed(17)
+    #a = np.random.randn(16)
+    cnst = np.random.randn(16)
+    knl = lp.make_kernel(
+            "{[i, ii]: 0<=i, ii<n}",
+            """
+            out[i] = a[i]+cnst[i]{id=first}
+            out[ii] = 2*out[ii]+cnst[ii]{id=second}
+            """,
+            [lp.TemporaryVariable(
+                'cnst', shape=('n'), initializer=cnst,
+                scope=lp.AddressSpace.GLOBAL,
+                read_only=True), '...'])
+    knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32))
+    knl = lp.fix_parameters(knl, n=16)
+    knl = lp.add_barrier(knl, "id:first", "id:second")
+
+    knl = lp.split_iname(knl, "i", 2, outer_tag="g.0", inner_tag="l.0")
+    knl = lp.split_iname(knl, "ii", 2, outer_tag="g.0", inner_tag="l.0")
+if knl_choice == "nop":
+    knl = lp.make_kernel(
+        [
+            "{[b]: b_start<=b<b_end}",
+            "{[c,idim]: c_start<=c<c_end and 0<=idim<dim}",
+        ],
+        """
+         for b
+          <> c_end = 2
+          for c
+           ... nop
+          end
+         end
+        """,
+        "...",
+        seq_dependencies=True)
+    knl = lp.fix_parameters(knl, dim=3)
+if knl_choice == "nest_multi_dom":
+    #"{[i,j,k]: 0<=i,j,k<n}",
+    knl = lp.make_kernel(
+        [
+            "{[i]: 0<=i<ni}",
+            "{[j]: 0<=j<nj}",
+            "{[k]: 0<=k<nk}",
+            "{[x,xx]: 0<=x,xx<nx}",
+        ],
+        """
+        for x,xx
+          for i
+            <>acc = 0 {id=insn0}
+            for j
+              for k
+                acc = acc + j + k {id=insn1,dep=insn0}
+              end
+            end
+          end
+        end
+        """,
+        name="nest_multi_dom",
+        #assumptions="n >= 1",
+        assumptions="ni,nj,nk,nx >= 1",
+        lang_version=(2018, 2)
+        )
+    """
+    <>foo = 0 {id=insn0}
+    for i
+      <>acc = 0 {id=insn1}
+      for j
+        for k
+          acc = acc + j + k {id=insn2,dep=insn1}
+        end
+      end
+      foo = foo + acc {id=insn3,dep=insn2}
+    end
+    <>bar = foo {id=insn4,dep=insn3}
+    """
+    knl = lp.prioritize_loops(knl, "x,xx,i")
+    knl = lp.prioritize_loops(knl, "i,j")
+    knl = lp.prioritize_loops(knl, "j,k")
+
+if knl_choice == "loop_carried_deps":
+    knl = lp.make_kernel(
+        "{[i]: 0<=i<n}",
+        """
+        <>acc0 = 0 {id=insn0}
+        for i
+          acc0 = acc0 + i {id=insn1,dep=insn0}
+          <>acc2 = acc0 + i {id=insn2,dep=insn1}
+          <>acc3 = acc2 + i {id=insn3,dep=insn2}
+          <>acc4 = acc0 + i {id=insn4,dep=insn1}
+        end
+        """,
+        name="loop_carried_deps",
+        assumptions="n >= 1",
+        lang_version=(2018, 2)
+        )
+
+unprocessed_knl = knl.copy()
+
+deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl)
+
+# get a linearization to check
+knl = preprocess_kernel(knl)
+knl = get_one_linearized_kernel(knl)
+print("kernel schedueld")
+linearization_items = knl.linearization
+print("checking validity")
+
+linearization_is_valid = lp.check_linearization_validity(
+    unprocessed_knl, deps, linearization_items,
+    )
+
+"""
+legacy_statement_pair_dep_sets = lp.statement_pair_dep_sets_from_legacy_knl(knl)
+
+# get a linearization to check
+from loopy import get_one_linearized_kernel
+linearized_knl = get_one_linearized_kernel(knl)
+linearization_items = linearized_knl.linearization
+
+linearization_is_valid = lp.check_linearization_validity(
+    knl, legacy_statement_pair_dep_sets, linearization_items)
+"""
+
+print("is linearization valid? constraint map subset of SIO?")
+print(linearization_is_valid)
+
+
+print("="*80)
+print("testing dep sort")
+print("="*80)
+
+# create dependency graph
+
+# for which deps does the intersection with the SAME dependency relation exist?
+# create a graph including these deps as edges (from after->before)
+
+from loopy.schedule.checker.utils import (
+    get_concurrent_inames,
+)
+_, non_conc_inames = get_concurrent_inames(knl)
+legacy_deps_filtered_by_same = filter_deps_by_intersection_with_SAME(
+        knl,
+        deps,
+        non_conc_inames,
+        )
+
+# get dep graph edges
+dep_graph_pairs = [
+        (insn_id_before, insn_id_after)
+        for insn_id_before, insn_id_after, _ in legacy_deps_filtered_by_same]
+
+# create dep graph from edges
+dep_graph = create_graph_from_pairs(dep_graph_pairs)
+
+print("dep_graph:")
+for k, v in dep_graph.items():
+    print("%s: %s" % (k, v))
diff --git a/loopy/schedule/checker/experimental_scripts/example_wave_equation.py b/loopy/schedule/checker/experimental_scripts/example_wave_equation.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed2da94e58c7ba821a9ba05d429ac562d108d774
--- /dev/null
+++ b/loopy/schedule/checker/experimental_scripts/example_wave_equation.py
@@ -0,0 +1,623 @@
+__copyright__ = "Copyright (C) 2019 James Stevens"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+""" WIP: NO NEED TO REVIEW YET """
+import loopy as lp
+from loopy import generate_code_v2
+from loopy import get_one_linearized_kernel
+from loopy import preprocess_kernel
+import numpy as np
+import islpy as isl
+#from loopy.kernel_stat_collector import KernelStatCollector
+#from loopy.kernel_stat_collector import KernelStatOptions as kso  # noqa
+from loopy.schedule.checker.utils import (
+    prettier_map_string,
+    ensure_dim_names_match_and_align,
+    append_marker_to_isl_map_var_names,
+    get_concurrent_inames,
+)
+from loopy.schedule.checker.dependency import (
+    create_arbitrary_dependency_constraint,
+)
+from loopy.schedule.checker.schedule import PairwiseScheduleBuilder
+from loopy.schedule.checker.lexicographic_order_map import (
+    get_statement_ordering_map,
+)
+
+# Make kernel ----------------------------------------------------------
+
+#    u[x,t+1] = 2*u[x,t] - u[x,t-1] + c*(dt/dx)**2*(u[x+1,t] - 2*u[x,t] + u[x-1,t])
+# mine, works:
+#    "{[x,t]: 1<=x<nx-1 and 2<=t<nt}",
+#    [
+#    "u[x,t] = 2*u[x,t-1] - u[x,t-2] + "
+#    "c*(dt/dx)**2*(u[x+1,t-1] - 2*u[x,t-1] + u[x-1,t-1])  {id=0}",
+#    ],
+
+
+#AK:
+knl = lp.make_kernel(
+    "[nx,nt] -> {[ix, it]: 1<=ix<nx-1 and 0<=it<nt}",
+    [
+        "u[ix, it+2] = 2*u[ix, it+1] - u[ix, it] + dt**2/dx**2 * "
+        "(u[ix+1, it+1] - 2*u[ix, it+1] + u[ix-1, it+1])  {id=0}",
+    ],
+    name="wave_equation",
+    assumptions="nx,nt >= 3",
+    lang_version=(2018, 2),
+    )
+'''
+ref = lp.make_kernel(
+    "[nx,nt] -> {[ix, it]: 1<=ix<nx-1 and 0<=it<nt}",
+    """
+    u[ix, it+2] = (
+        2*u[ix, it+1]
+        + dt**2/dx**2 * (u[ix+1, it+1] - 2*u[ix, it+1] + u[ix-1, it+1])
+        - u[ix, it])  {id=0}
+    """)
+'''
+knl = lp.add_and_infer_dtypes(knl, {"u,dt,dx": np.float32})
+
+ref_knl = knl
+
+# ak:
+knl = lp.prioritize_loops(knl, ("it", "ix"))  # valid
+#knl = lp.prioritize_loops(knl, ("ix", "it"))  # invalid
+
+statement_inames_premap = set(["ix", "it"])  # ak
+statement_inames_premap_order = ["ix", "it"]
+sid_before = 0
+sid_after = 0
+
+preprocessed_knl = preprocess_kernel(knl)
+inames_domain_before = preprocessed_knl.get_inames_domain(statement_inames_premap)
+inames_domain_after = preprocessed_knl.get_inames_domain(statement_inames_premap)
+print("(unmapped) inames_domain_before:", inames_domain_before)
+print("(unmapped) inames_domain_after:", inames_domain_after)
+
+"""
+constraint_map = _create_5pt_stencil_dependency_constraint(
+        inames_domain_before,
+        inames_domain_after,
+        sid_before = sid_before,
+        sid_after = sid_after,
+        space_iname = "ix",
+        time_iname = "it",
+        #all_dom_inames_ordered=None,
+        all_dom_inames_ordered=statement_inames_premap_order,
+    )
+print("constraint_map before mapping:")
+print(prettier_map_string(constraint_map))
+1/0
+"""
+"""
+        islvars[time_iname_prime].eq_set(islvars[time_iname] + one) &
+            (
+            (islvars[space_iname_prime]-two).lt_set(islvars[space_iname]) &
+             islvars[space_iname].lt_set(islvars[space_iname_prime]+two)
+            )
+        |
+        islvars[time_iname_prime].eq_set(islvars[time_iname] + two) &
+            islvars[space_iname].eq_set(islvars[space_iname_prime])
+        )
+"""
+# TODO testing new dep map
+constraint_map = create_arbitrary_dependency_constraint(
+    #"itp = it + 1 and ixp - 2 < ix and ix < ixp + 2 "
+    #"or itp = it + 2 and ix = ixp",
+    "it = itp + 1 and ix - 2 < ixp and ixp < ix + 2 "
+    "or it = itp + 2 and ixp = ix",  # primes moved to 'before' statement
+    inames_domain_before,
+    inames_domain_after,
+    sid_before=sid_before,
+    sid_after=sid_after,
+    #all_dom_inames_ordered=None,
+    all_dom_inames_ordered=statement_inames_premap_order,  # TODO eliminate this arg
+    )
+print("constraint_map before mapping:")
+print(prettier_map_string(constraint_map))
+# TODO (left off here)
+# TODO decide on before/after notation and make consistent
+#1/0
+
+verbose = False
+verbose = True
+
+# get a linearization to check
+if preprocessed_knl.linearization is None:
+    linearized_knl = get_one_linearized_kernel(preprocessed_knl)
+else:
+    linearized_knl = preprocessed_knl
+
+# {{{ verbose
+
+if verbose:
+    # Print kernel info ------------------------------------------------------
+    print("="*80)
+    print("Kernel:")
+    print(linearized_knl)
+    #print(generate_code_v2(linearized_knl).device_code())
+    print("="*80)
+    print("Iname tags: %s" % (linearized_knl.iname_to_tags))
+    print("="*80)
+    print("Loopy linearization:")
+    for linearization_item in linearized_knl.linearization:
+        print(linearization_item)
+
+    print("="*80)
+    print("inames_domain_before:", inames_domain_before)
+    print("inames_domain_after:", inames_domain_after)
+
+# }}}
+
+conc_loop_inames, _ = get_concurrent_inames(linearized_knl)
+
+# Create a mapping of {statement instance: lex point}
+# including only instructions involved in this dependency
+sched = PairwiseScheduleBuilder(
+    linearized_knl.linearization,
+    str(sid_before),
+    str(sid_after),
+    loops_to_ignore=conc_loop_inames,
+    )
+
+# Get an isl map representing the PairwiseScheduleBuilder;
+# this requires the iname domains
+
+# get a mapping from lex schedule id to relevant inames domain
+sid_to_dom = {
+    sid_before: inames_domain_before,
+    sid_after: inames_domain_after,
+    }
+
+isl_sched_map_before, isl_sched_map_after = sched.build_maps(linearized_knl)
+
+# {{{ verbose
+
+if verbose:
+    print("sid_to_dom:\n", sid_to_dom)
+    print("PairwiseScheduleBuilder after creating isl map:")
+    print(sched)
+    print("LexSched:")
+    print(prettier_map_string(isl_sched_map_before))
+    print(prettier_map_string(isl_sched_map_after))
+    #print("space (statement instances -> lex time):")
+    #print(isl_sched_map.space)
+    #print("-"*80)
+
+# }}}
+
+# get map representing lexicographic ordering
+sched_lex_order_map = sched.get_lex_order_map_for_sched_space()
+
+# {{{ verbose
+
+"""
+if verbose:
+    print("sched lex order map:")
+    print(prettier_map_string(sched_lex_order_map))
+    print("space (lex time -> lex time):")
+    print(sched_lex_order_map.space)
+    print("-"*80)
+"""
+
+# }}}
+
+# create statement instance ordering,
+# maps each statement instance to all statement instances occuring later
+sio = get_statement_ordering_map(
+    isl_sched_map_before,
+    isl_sched_map_after,
+    sched_lex_order_map,
+    before_marker="p")
+
+# {{{ verbose
+
+if verbose:
+    print("statement instance ordering:")
+    print(prettier_map_string(sio))
+    print("SIO space (statement instances -> statement instances):")
+    print(sio.space)
+    print("-"*80)
+
+if verbose:
+    print("constraint map space (before aligning):")
+    print(constraint_map.space)
+
+# }}}
+
+# align constraint map spaces to match sio so we can compare them
+aligned_constraint_map = ensure_dim_names_match_and_align(constraint_map, sio)
+
+# {{{ verbose
+
+if verbose:
+    print("constraint map space (after aligning):")
+    print(aligned_constraint_map.space)
+    print("constraint map:")
+    print(prettier_map_string(aligned_constraint_map))
+
+# }}}
+
+assert aligned_constraint_map.space == sio.space
+assert (
+    aligned_constraint_map.space.get_var_names(isl.dim_type.in_)
+    == sio.space.get_var_names(isl.dim_type.in_))
+assert (
+    aligned_constraint_map.space.get_var_names(isl.dim_type.out)
+    == sio.space.get_var_names(isl.dim_type.out))
+assert (
+    aligned_constraint_map.space.get_var_names(isl.dim_type.param)
+    == sio.space.get_var_names(isl.dim_type.param))
+
+linearization_is_valid = aligned_constraint_map.is_subset(sio)
+
+if not linearization_is_valid:
+
+    # {{{ verbose
+
+    if verbose:
+        print("================ constraint check failure =================")
+        print("constraint map not subset of SIO")
+        print("dependency:")
+        print(prettier_map_string(constraint_map))
+        print("statement instance ordering:")
+        print(prettier_map_string(sio))
+        print("constraint_map.gist(sio):")
+        print(aligned_constraint_map.gist(sio))
+        print("sio.gist(constraint_map)")
+        print(sio.gist(aligned_constraint_map))
+        print("loop priority known:")
+        print(preprocessed_knl.loop_priority)
+        """
+        from loopy.schedule.checker.utils import (
+            get_concurrent_inames,
+        )
+        conc_inames, non_conc_inames = get_concurrent_inames(linearized_knl)
+        print("concurrent inames:", conc_inames)
+        print("sequential inames:", non_conc_inames)
+        print("constraint map space (stmt instances -> stmt instances):")
+        print(aligned_constraint_map.space)
+        print("SIO space (statement instances -> statement instances):")
+        print(sio.space)
+        print("constraint map:")
+        print(prettier_map_string(aligned_constraint_map))
+        print("statement instance ordering:")
+        print(prettier_map_string(sio))
+        print("{insn id -> sched sid int} dict:")
+        print(lp_insn_id_to_lex_sched_id)
+        """
+        print("===========================================================")
+
+        # }}}
+
+print("is linearization valid? constraint map subset of SIO?")
+print(linearization_is_valid)
+
+
+# ======================================================================
+# now do this with complicated mapping
+
+
+# create mapping:
+# old (wrong)
+"""
+m = isl.BasicMap(
+    "[nx,nt] -> {[ix, it] -> [tx, tt, tparity, itt, itx]: "
+    "16*(tx - tt + tparity) + itx - itt = ix - it and "
+    "16*(tx + tt) + itt + itx = ix + it and "
+    "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}")
+m2 = isl.BasicMap(
+    "[nx,nt,unused] -> {[statement, ix, it] -> "
+    "[statement'=statement, tx, tt, tparity, itt, itx]: "
+    "16*(tx - tt + tparity) + itx - itt = ix - it and "
+    "16*(tx + tt) + itt + itx = ix + it and "
+    "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}")
+m2_prime = isl.BasicMap(
+    "[nx,nt,unused] -> {[statement, ix, it] -> "
+    "[statement'=statement, tx', tt', tparity', itt', itx']: "
+    "16*(tx' - tt' + tparity') + itx' - itt' = ix - it and "
+    "16*(tx' + tt') + itt' + itx' = ix + it and "
+    "0<=tparity'<2 and 0 <= itx' - itt' < 16 and 0 <= itt'+itx' < 16}")
+"""
+
+# new
+# TODO remove "unused"
+m = isl.BasicMap(
+    "[nx,nt] -> {[ix, it] -> [tx, tt, tparity, itt, itx]: "
+    "16*(tx - tt) + itx - itt = ix - it and "
+    "16*(tx + tt + tparity) + itt + itx = ix + it and "
+    "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}")
+m2 = isl.BasicMap(
+    "[nx,nt,unused] -> {[_lp_linchk_statement, ix, it] -> "
+    "[_lp_linchk_statement'=_lp_linchk_statement, tx, tt, tparity, itt, itx]: "
+    "16*(tx - tt) + itx - itt = ix - it and "
+    "16*(tx + tt + tparity) + itt + itx = ix + it and "
+    "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}")
+#m2_primes_after = isl.BasicMap(
+#    "[nx,nt,unused] -> {[statement, ix, it] -> "
+#    "[statement'=statement, tx', tt', tparity', itt', itx']: "
+#    "16*(tx' - tt') + itx' - itt' = ix - it and "
+#    "16*(tx' + tt' + tparity') + itt' + itx' = ix + it and "
+#    "0<=tparity'<2 and 0 <= itx' - itt' < 16 and 0 <= itt'+itx' < 16}")
+m2_prime = isl.BasicMap(
+    "[nx,nt,unused] -> {[_lp_linchk_statement', ix', it'] -> "
+    "[_lp_linchk_statement=_lp_linchk_statement', tx, tt, tparity, itt, itx]: "
+    "16*(tx - tt) + itx - itt = ix' - it' and "
+    "16*(tx + tt + tparity) + itt + itx = ix' + it' and "
+    "0<=tparity<2 and 0 <= itx - itt < 16 and 0 <= itt+itx < 16}")
+
+# TODO note order must match statement_iname_premap_order
+
+print("maping:")
+print(prettier_map_string(m2))
+
+# new kernel
+knl = lp.map_domain(ref_knl, m)
+knl = lp.prioritize_loops(knl, "tt,tparity,tx,itt,itx")
+print("code after mapping:")
+print(generate_code_v2(knl).device_code())
+#1/0
+
+print("constraint_map before apply_range:")
+print(prettier_map_string(constraint_map))
+#mapped_constraint_map = constraint_map.apply_range(m2_prime)
+mapped_constraint_map = constraint_map.apply_range(m2)
+print("constraint_map after apply_range:")
+print(prettier_map_string(mapped_constraint_map))
+#mapped_constraint_map = mapped_constraint_map.apply_domain(m2)
+mapped_constraint_map = mapped_constraint_map.apply_domain(m2_prime)
+# put primes on *before* names
+mapped_constraint_map = append_marker_to_isl_map_var_names(
+    mapped_constraint_map, isl.dim_type.in_, marker="'")
+
+print("constraint_map after apply_domain:")
+print(prettier_map_string(mapped_constraint_map))
+
+statement_inames_mapped = set(["itx", "itt", "tt", "tparity", "tx"])
+sid_before = 0
+sid_after = 0
+
+preprocessed_knl = preprocess_kernel(knl)
+inames_domain_before_mapped = preprocessed_knl.get_inames_domain(
+    statement_inames_mapped)
+inames_domain_after_mapped = preprocessed_knl.get_inames_domain(
+    statement_inames_mapped)
+print("(mapped) inames_domain_before:", inames_domain_before_mapped)
+print("(mapped) inames_domain_after:", inames_domain_after_mapped)
+
+# =============================================
+
+verbose = False
+verbose = True
+
+# get a linearization to check
+if preprocessed_knl.linearization is None:
+    linearized_knl = get_one_linearized_kernel(preprocessed_knl)
+else:
+    linearized_knl = preprocessed_knl
+
+# {{{ verbose
+
+if verbose:
+    # Print kernel info ------------------------------------------------------
+    print("="*80)
+    print("Kernel:")
+    print(linearized_knl)
+    #print(generate_code_v2(linearized_knl).device_code())
+    print("="*80)
+    print("Iname tags: %s" % (linearized_knl.iname_to_tags))
+    print("="*80)
+    print("Loopy linearization:")
+    for linearization_item in linearized_knl.linearization:
+        print(linearization_item)
+
+    print("="*80)
+    print("inames_domain_before_mapped:", inames_domain_before_mapped)
+    print("inames_domain_after_mapped:", inames_domain_after_mapped)
+
+# }}}
+
+
+conc_loop_inames, _ = get_concurrent_inames(linearized_knl)
+# Create a mapping of {statement instance: lex point}
+# including only instructions involved in this dependency
+sched = PairwiseScheduleBuilder(
+    linearized_knl.linearization,
+    str(sid_before),
+    str(sid_after),
+    loops_to_ignore=conc_loop_inames,
+    )
+
+# Get an isl map representing the PairwiseScheduleBuilder;
+# this requires the iname domains
+
+# get a mapping from lex schedule id to relevant inames domain
+sid_to_dom = {
+    sid_before: inames_domain_before_mapped,
+    sid_after: inames_domain_after_mapped,
+    }
+
+isl_sched_map_before, isl_sched_map_after = sched.build_maps(linearized_knl)
+
+# {{{ verbose
+
+if verbose:
+    print("sid_to_dom:\n", sid_to_dom)
+    print("PairwiseScheduleBuilder after creating isl map:")
+    print(sched)
+    print("LexSched:")
+    print(prettier_map_string(isl_sched_map_before))
+    print(prettier_map_string(isl_sched_map_after))
+    #print("space (statement instances -> lex time):")
+    #print(isl_sched_map.space)
+    #print("-"*80)
+
+# }}}
+
+# get map representing lexicographic ordering
+sched_lex_order_map = sched.get_lex_order_map_for_sched_space()
+
+# {{{ verbose
+
+"""
+if verbose:
+    print("sched lex order map:")
+    print(prettier_map_string(sched_lex_order_map))
+    print("space (lex time -> lex time):")
+    print(sched_lex_order_map.space)
+    print("-"*80)
+"""
+
+# }}}
+
+# create statement instance ordering,
+# maps each statement instance to all statement instances occuring later
+sio = get_statement_ordering_map(
+    isl_sched_map_before,
+    isl_sched_map_after,
+    sched_lex_order_map,
+    before_marker="'")
+
+# {{{ verbose
+
+if verbose:
+    print("statement instance ordering:")
+    print(prettier_map_string(sio))
+    print("SIO space (statement instances -> statement instances):")
+    print(sio.space)
+    print("-"*80)
+
+if verbose:
+    print("constraint map space (before aligning):")
+    print(constraint_map.space)
+
+# }}}
+
+# align constraint map spaces to match sio so we can compare them
+aligned_constraint_map = ensure_dim_names_match_and_align(constraint_map, sio)
+
+# {{{ verbose
+
+if verbose:
+    print("constraint map space (after aligning):")
+    print(aligned_constraint_map.space)
+    print("constraint map:")
+    print(prettier_map_string(aligned_constraint_map))
+
+# }}}
+
+assert aligned_constraint_map.space == sio.space
+assert (
+    aligned_constraint_map.space.get_var_names(isl.dim_type.in_)
+    == sio.space.get_var_names(isl.dim_type.in_))
+assert (
+    aligned_constraint_map.space.get_var_names(isl.dim_type.out)
+    == sio.space.get_var_names(isl.dim_type.out))
+assert (
+    aligned_constraint_map.space.get_var_names(isl.dim_type.param)
+    == sio.space.get_var_names(isl.dim_type.param))
+
+linearization_is_valid = aligned_constraint_map.is_subset(sio)
+
+if not linearization_is_valid:
+
+    # {{{ verbose
+
+    if verbose:
+        print("================ constraint check failure =================")
+        print("constraint map not subset of SIO")
+        print("dependency:")
+        print(prettier_map_string(constraint_map))
+        print("statement instance ordering:")
+        print(prettier_map_string(sio))
+        print("constraint_map.gist(sio):")
+        print(aligned_constraint_map.gist(sio))
+        print("sio.gist(constraint_map)")
+        print(sio.gist(aligned_constraint_map))
+        print("loop priority known:")
+        print(preprocessed_knl.loop_priority)
+        """
+        from loopy.schedule.checker.utils import (
+            get_concurrent_inames,
+        )
+        conc_inames, non_conc_inames = get_concurrent_inames(linearized_knl)
+        print("concurrent inames:", conc_inames)
+        print("sequential inames:", non_conc_inames)
+        print("constraint map space (stmt instances -> stmt instances):")
+        print(aligned_constraint_map.space)
+        print("SIO space (statement instances -> statement instances):")
+        print(sio.space)
+        print("constraint map:")
+        print(prettier_map_string(aligned_constraint_map))
+        print("statement instance ordering:")
+        print(prettier_map_string(sio))
+        print("{insn id -> sched sid int} dict:")
+        print(lp_insn_id_to_lex_sched_id)
+        """
+        print("===========================================================")
+
+        # }}}
+
+print("is linearization valid? constraint map subset of SIO?")
+print(linearization_is_valid)
+
+'''
+# (U_n^{k+1}-U_n^k)/dt = C*(U_{n+1}^k-U_n^k)/dx
+# U_n^{k+1} = U_n^k + dt/dx*C*(U_{n+1}^k-U_n^k)
+'''
+
+# Get stats ----------------------------------------------------------
+
+"""
+sc = KernelStatCollector(
+        evaluate_polys=False,
+        count_madds=False,  # TODO enable after madd counting branch is merged
+        )
+#nx = 2**11
+#nt = 2**11
+nx = 2**5
+nt = 2**5
+param_dict = {"nx": nx, "nt": nt, "c": 1, "dt": 0.1, "dx": 0.1}
+stat_list = [kso.WALL_TIME, kso.OP_MAP, kso.FLOP_RATE]
+stats = sc.collect_stats(knl, stat_list, param_dict=param_dict)
+
+# Measured time + flop rate
+time_measured = stats[kso.WALL_TIME]
+#flop_rate_measured = stats[kso.FLOP_RATE]
+
+print("time:", time_measured)
+"""
+
+"""
+linearization_is_valid = lp.check_linearization_validity(knl, verbose=True)
+
+print("is linearization valid? constraint map subset of SIO?")
+print(linearization_is_valid)
+"""
+
+"""
+linearization_is_valid = lp.check_linearization_validity(knl, verbose=True)
+
+print("is linearization valid? constraint map subset of SIO?")
+print(linearization_is_valid)
+"""
diff --git a/loopy/schedule/checker/lexicographic_order_map.py b/loopy/schedule/checker/lexicographic_order_map.py
new file mode 100644
index 0000000000000000000000000000000000000000..b547e1d94689394642448de61274b1d52e0dbc89
--- /dev/null
+++ b/loopy/schedule/checker/lexicographic_order_map.py
@@ -0,0 +1,180 @@
+# coding: utf-8
+__copyright__ = "Copyright (C) 2019 James Stevens"
+
+__license__ = """
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import islpy as isl
+
+
+def get_statement_ordering_map(
+        sched_map_before, sched_map_after, lex_map, before_marker="'"):
+    """Return a mapping that maps each statement instance to
+        all statement instances occuring later.
+
+    :arg sched_map_before: An :class:`islpy.Map` representing instruction
+        instance order for the dependee as a mapping from each statement
+        instance to a point in the lexicographic ordering.
+
+    :arg sched_map_after: An :class:`islpy.Map` representing instruction
+        instance order for the depender as a mapping from each statement
+        instance to a point in the lexicographic ordering.
+
+    :arg lex_map: An :class:`islpy.Map` representing a lexicographic
+        ordering as a mapping from each point in lexicographic time
+        to every point that occurs later in lexicographic time. E.g.::
+
+            {[i0', i1', i2', ...] -> [i0, i1, i2, ...] :
+                i0' < i0 or (i0' = i0 and i1' < i1)
+                or (i0' = i0 and i1' = i1 and i2' < i2) ...}
+
+    :returns: An :class:`islpy.Map` representing the lex schedule as
+        a mapping from each statement instance to all statement instances
+        occuring later. I.e., we compose relations B, L, and A as
+        B ∘ L ∘ A^-1, where B is sched_map_before, A is sched_map_after,
+        and L is the lexicographic ordering map.
+
+    """
+
+    sio = sched_map_before.apply_range(
+        lex_map).apply_range(sched_map_after.reverse())
+    # append marker to in names
+    from loopy.schedule.checker.utils import (
+        append_marker_to_isl_map_var_names,
+    )
+    return append_marker_to_isl_map_var_names(
+        sio, isl.dim_type.in_, before_marker)
+
+
+def get_lex_order_constraint(before_names, after_names, islvars=None):
+    """Return a constraint represented as an :class:`islpy.Set`
+        defining a 'happens before' relationship in a lexicographic
+        ordering.
+
+    :arg before_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs before. (see example below)
+
+    :arg after_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs after. (see example below)
+
+    :arg islvars: A dictionary from variable names to :class:`islpy.PwAff`
+        instances that represent each of the variables
+        (islvars may be produced by `islpy.make_zero_and_vars`). The key
+        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
+        This dictionary defines the space to be used for the set. If no
+        value is passed, the dictionary will be made using ``before_names``
+        and ``after_names``.
+
+    :returns: An :class:`islpy.Set` representing a constraint that enforces a
+        lexicographic ordering. E.g., if ``before_names = [i0', i1', i2']`` and
+        ``after_names = [i0, i1, i2]``, return the set::
+
+            {[i0', i1', i2', i0, i1, i2] :
+                i0' < i0 or (i0' = i0 and i1' < i1)
+                or (i0' = i0 and i1' = i1 and i2' < i2)}
+
+    """
+
+    # If no islvars passed, make them using the names provided
+    if islvars is None:
+        islvars = isl.make_zero_and_vars(before_names+after_names, [])
+
+    # Initialize constraint with i0' < i0
+    lex_order_constraint = islvars[before_names[0]].lt_set(islvars[after_names[0]])
+
+    # Initialize conjunction constraint with True.
+    # For each dim d, starting with d=1, this conjunction will have d equalities,
+    # e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1))
+    equality_constraint_conj = islvars[0].eq_set(islvars[0])
+
+    for i in range(1, len(before_names)):
+
+        # Add the next equality constraint to equality_constraint_conj
+        equality_constraint_conj = equality_constraint_conj & \
+            islvars[before_names[i-1]].eq_set(islvars[after_names[i-1]])
+
+        # Create a conjunction constraint by combining a less-than
+        # constraint for this dim, e.g., (i1' < i1), with the current
+        # equality constraint conjunction.
+        # For each dim d, starting with d=1, this conjunction will have d equalities,
+        # and one inequality,
+        # e.g., (i0' = i0 and i1' = i1 and ... i(d-1)' = i(d-1) and id' < id)
+        full_conj_constraint = islvars[before_names[i]].lt_set(
+            islvars[after_names[i]]) & equality_constraint_conj
+
+        # Union this new constraint with the current lex_order_constraint
+        lex_order_constraint = lex_order_constraint | full_conj_constraint
+
+    return lex_order_constraint
+
+
+def create_lex_order_map(
+        n_dims,
+        before_names=None,
+        after_names=None,
+        ):
+    """Return a mapping that maps each point in a lexicographic
+        ordering to every point that occurs later in lexicographic
+        time.
+
+    :arg n_dims: An :class:`int` representing the number of dimensions
+        in the lexicographic ordering.
+
+    :arg before_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs before. (see example below)
+
+    :arg after_names: A list of :class:`str` variable names representing
+        the lexicographic space dimensions for a point in lexicographic
+        time that occurs after. (see example below)
+
+    :returns: An :class:`islpy.Map` representing a lexicographic
+        ordering as a mapping from each point in lexicographic time
+        to every point that occurs later in lexicographic time.
+        E.g., if ``before_names = [i0', i1', i2']`` and
+        ``after_names = [i0, i1, i2]``, return the map::
+
+            {[i0', i1', i2'] -> [i0, i1, i2] :
+                i0' < i0 or (i0' = i0 and i1' < i1)
+                or (i0' = i0 and i1' = i1 and i2' < i2)}
+
+    """
+
+    if after_names is None:
+        after_names = ["i%s" % (i) for i in range(n_dims)]
+    if before_names is None:
+        from loopy.schedule.checker.utils import (
+            append_marker_to_strings,
+        )
+        before_names = append_marker_to_strings(after_names, marker="'")
+
+    assert len(before_names) == len(after_names) == n_dims
+    dim_type = isl.dim_type
+
+    lex_order_constraint = get_lex_order_constraint(before_names, after_names)
+
+    lex_map = isl.Map.from_domain(lex_order_constraint)
+    lex_map = lex_map.move_dims(
+        dim_type.out, 0, dim_type.in_,
+        len(before_names), len(after_names))
+
+    return lex_map
diff --git a/loopy/schedule/checker/schedule.py b/loopy/schedule/checker/schedule.py
index 6e1faf0a4d161c6bd31de68c585bf942dc8642ba..e5a6e3fd084c3affddb49aa3fdb577f08b637a41 100644
--- a/loopy/schedule/checker/schedule.py
+++ b/loopy/schedule/checker/schedule.py
@@ -84,6 +84,9 @@ class StatementRef(object):
             and self.int_id == other.int_id
             )
 
+    #def __hash__(self):
+    #    return hash(repr(self))
+
     def update_persistent_hash(self, key_hash, key_builder):
         """Custom hash computation function for use with
         :class:`pytools.persistent_dict.PersistentDict`.
@@ -395,6 +398,24 @@ class PairwiseScheduleBuilder(object):
     def get_lex_var_names(self):
         return [LEX_VAR_PREFIX+str(i) for i in range(self.max_lex_dims())]
 
+    def get_lex_order_map_for_sched_space(self):
+        """Return an :class:`islpy.BasicMap` that maps each point in a
+            lexicographic ordering to every point that is
+            lexocigraphically greater.
+        """
+
+        from loopy.schedule.checker.lexicographic_order_map import (
+            create_lex_order_map,
+        )
+        n_dims = self.max_lex_dims()
+        return create_lex_order_map(
+            n_dims, after_names=self.get_lex_var_names())
+
+    def __eq__(self, other):
+        return (
+            self.stmt_instance_before == other.stmt_instance_before
+            and self.stmt_instance_after == other.stmt_instance_after)
+
     def __str__(self):
 
         def stringify_sched_stmt_instance(stmt_inst):
diff --git a/loopy/schedule/checker/utils.py b/loopy/schedule/checker/utils.py
index e862d166e17e8346d8fb87d02443ce4721280878..5c51b61b6418224176db447e727c901ac6082d63 100644
--- a/loopy/schedule/checker/utils.py
+++ b/loopy/schedule/checker/utils.py
@@ -132,6 +132,72 @@ def ensure_dim_names_match_and_align(obj_map, tgt_map):
     return aligned_obj_map
 
 
+def append_marker_to_isl_map_var_names(old_isl_map, dim_type, marker="'"):
+    """Return an isl_map with marker appended to
+        dim_type dimension names.
+
+    :arg old_isl_map: A :class:`islpy.Map`.
+
+    :arg dim_type: A :class:`islpy.dim_type`, i.e., an :class:`int`,
+        specifying the dimension to be marked.
+
+    :returns: A :class:`islpy.Map` matching `old_isl_map` with
+        apostrophes appended to dim_type dimension names.
+
+    """
+
+    new_map = old_isl_map.copy()
+    for i in range(len(old_isl_map.get_var_names(dim_type))):
+        new_map = new_map.set_dim_name(dim_type, i, old_isl_map.get_dim_name(
+            dim_type, i)+marker)
+    return new_map
+
+
+def make_islvars_with_marker(
+        var_names_needing_marker, other_var_names, param_names=[], marker="'"):
+    """Return a dictionary from variable and parameter names
+        to :class:`islpy.PwAff` instances that represent each of
+        the variables and parameters, appending marker to
+        var_names_needing_marker.
+
+    :arg var_names_needing_marker: A :class:`list` of :class:`str`
+        elements representing variable names to have markers appended.
+
+    :arg other_var_names: A :class:`list` of :class:`str`
+        elements representing variable names to be included as-is.
+
+    :arg param_names:  A :class:`list` of :class:`str` elements
+        representing parameter names.
+
+    :returns: A dictionary from variable names to :class:`islpy.PwAff`
+        instances that represent each of the variables
+        (islvars may be produced by `islpy.make_zero_and_vars`). The key
+        '0' is also include and represents a :class:`islpy.PwAff` zero constant.
+
+    """
+
+    def append_marker(items, mark):
+        new_items = []
+        for item in items:
+            new_items.append(item+mark)
+        return new_items
+
+    return isl.make_zero_and_vars(
+            append_marker(var_names_needing_marker, marker)
+            + other_var_names, param_names)
+
+
+def append_marker_to_strings(strings, marker="'"):
+    if not isinstance(strings, list):
+        raise ValueError("append_marker_to_strings did not receive a list")
+    else:
+        return [s+marker for s in strings]
+
+
+def append_apostrophes(strings):
+    return append_marker_to_strings(strings, marker="'")
+
+
 def _get_union(list_items):
     union = list_items[0]
     for s in list_items[1:]:
@@ -176,6 +242,7 @@ def create_symbolic_map_from_tuples(
         on these values.
 
     """
+    # TODO clarify this with more comments
     # TODO allow None for domains
 
     dim_type = isl.dim_type
@@ -302,6 +369,166 @@ def get_insn_id_from_linearization_item(linearization_item):
         return linearization_item.insn_id
 
 
+# TODO for better performance, could combine these funcs so we don't
+# loop over linearization more than once
+def get_all_nonconcurrent_insn_iname_subsets(
+        knl, exclude_empty=False, non_conc_inames=None):
+    """Return a :class:`set` of every unique subset of non-concurrent
+        inames used in an instruction in a :class:`loopy.LoopKernel`.
+
+    :arg knl: A :class:`loopy.LoopKernel`.
+
+    :arg exclude_empty: A :class:`bool` specifying whether to
+        exclude the empty set.
+
+    :arg non_conc_inames: A :class:`set` of non-concurrent inames
+        which may be provided if already known.
+
+    :returns: A :class:`set` of every unique subset of non-concurrent
+        inames used in any instruction in a :class:`loopy.LoopKernel`.
+
+    """
+
+    if non_conc_inames is None:
+        _, non_conc_inames = get_concurrent_inames(knl)
+
+    iname_subsets = set()
+    for insn in knl.instructions:
+        iname_subsets.add(insn.within_inames & non_conc_inames)
+
+    if exclude_empty:
+        iname_subsets.discard(frozenset())
+
+    return iname_subsets
+
+
+def get_linearization_item_ids_within_inames(knl, inames):
+    linearization_item_ids = set()
+    for insn in knl.instructions:
+        if inames.issubset(insn.within_inames):
+            linearization_item_ids.add(insn.id)
+    return linearization_item_ids
+
+
+# TODO use yield to clean this up
+# TODO use topological sort from loopy, then find longest path in dag
+def _generate_orderings_starting_w_prefix(
+        allowed_after_dict, orderings, required_length=None,
+        start_prefix=(), return_first_found=False):
+    # alowed_after_dict = {str: set(str)}
+    # start prefix = tuple(str)
+    # orderings = set
+    if start_prefix:
+        next_items = allowed_after_dict[start_prefix[-1]]-set(start_prefix)
+    else:
+        next_items = allowed_after_dict.keys()
+
+    if required_length:
+        if len(start_prefix) == required_length:
+            orderings.add(start_prefix)
+            if return_first_found:
+                return
+    else:
+        orderings.add(start_prefix)
+        if return_first_found:
+            return
+
+    # return if no more items left
+    if not next_items:
+        return
+
+    for next_item in next_items:
+        new_prefix = start_prefix + (next_item,)
+        _generate_orderings_starting_w_prefix(
+                allowed_after_dict,
+                orderings,
+                required_length=required_length,
+                start_prefix=new_prefix,
+                return_first_found=return_first_found,
+                )
+        if return_first_found and orderings:
+            return
+    return
+
+
+def get_orderings_of_length_n(
+        allowed_after_dict, required_length, return_first_found=False):
+    """Return all orderings found in tree represented by `allowed_after_dict`.
+
+    :arg allowed_after_dict: A :class:`dict` mapping each :class:`string`
+        names to a :class:`set` of names that are allowed to come after
+        that name.
+
+    :arg required_length: A :class:`int` representing the length required
+        for all orderings. Orderings not matching the required length will
+        not be returned.
+
+    :arg return_first_found: A :class:`bool` specifying whether to return
+        the first valid ordering found.
+
+    :returns: A :class:`set` of all orderings that are *explicitly* allowed
+        by the tree represented by `allowed_after_dict`. I.e., if we know
+        a->b and c->b, we don't know enough to return a->c->b. Note that
+        if the set for a dict key is empty, nothing is allowed to come after.
+
+    """
+
+    orderings = set()
+    _generate_orderings_starting_w_prefix(
+        allowed_after_dict,
+        orderings,
+        required_length=required_length,
+        start_prefix=(),
+        return_first_found=return_first_found,
+        )
+    return orderings
+
+
+def create_graph_from_pairs(before_after_pairs):
+    # create key for every before
+    graph = dict([(before, set()) for before, _ in before_after_pairs])
+    for before, after in before_after_pairs:
+        graph[before] = graph[before] | set([after, ])
+    return graph
+
+
+# only used for example purposes:
+
+
+def create_explicit_map_from_tuples(tuple_pairs, space):
+    """Return a :class:`islpy.Map` in :class:`islpy.Space` space
+        mapping tup_in->tup_out for each `(tup_in, tup_out)` pair
+        in `tuple_pairs`, where `tup_in` and `tup_out` are
+        tuples of :class:`int` values to be assigned to the
+        corresponding dimension variables in `space`.
+
+    """
+
+    dim_type = isl.dim_type
+    individual_maps = []
+
+    for tup_in, tup_out in tuple_pairs:
+        constraints = []
+        for i, val_in in enumerate(tup_in):
+            constraints.append(
+                isl.Constraint.equality_alloc(space)
+                .set_coefficient_val(dim_type.in_, i, 1)
+                .set_constant_val(-1*val_in))
+        for i, val_out in enumerate(tup_out):
+            constraints.append(
+                isl.Constraint.equality_alloc(space)
+                .set_coefficient_val(dim_type.out, i, 1)
+                .set_constant_val(-1*val_out))
+        individual_maps.append(
+            isl.Map.universe(space).add_constraints(constraints))
+
+    union_map = individual_maps[0]
+    for m in individual_maps[1:]:
+        union_map = union_map.union(m)
+
+    return union_map
+
+
 def get_EnterLoop_inames(linearization_items, knl):
     from loopy.schedule import EnterLoop
     loop_inames = set()
diff --git a/loopy/tools.py b/loopy/tools.py
index a1cd5e108a45ba60c71b3bb7a51f779b84172065..594496bf624fc1c8e444fac72012c55b8ecbe914 100644
--- a/loopy/tools.py
+++ b/loopy/tools.py
@@ -355,6 +355,17 @@ def empty_aligned(shape, dtype, order='C', n=64):
 # }}}
 
 
+# {{{ get graph sources
+
+def get_graph_sources(graph):
+    sources = set(graph.keys())
+    for non_sources in graph.values():
+        sources -= non_sources
+    return sources
+
+# }}}
+
+
 # {{{ pickled container value
 
 class _PickledObject(object):
diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py
index a20a798cfa35c64c0cbd7097b41824dda2a35a84..723ff4f36092be9b0b31593b99d6656a0ece26ef 100644
--- a/loopy/transform/add_barrier.py
+++ b/loopy/transform/add_barrier.py
@@ -76,6 +76,7 @@ def add_barrier(knl, insn_before="", insn_after="", id_based_on=None,
                                         mem_kind=mem_kind)
 
     new_knl = knl.copy(instructions=knl.instructions + [barrier_to_add])
+    # TODO update with dependencies v2
     new_knl = add_dependency(kernel=new_knl,
                              insn_match=insn_after,
                              depends_on="id:"+id)
diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py
index 8432d59ec5b162f6e963abbeae3b2fcabe94cf27..0d18fdc8eac1af777e78871149e55fbc4eeb387d 100644
--- a/loopy/transform/iname.py
+++ b/loopy/transform/iname.py
@@ -33,6 +33,7 @@ from loopy.symbolic import (
         RuleAwareIdentityMapper, RuleAwareSubstitutionMapper,
         SubstitutionRuleMappingContext)
 from loopy.diagnostic import LoopyError
+from pytools import Record
 
 
 __doc__ = """
@@ -79,16 +80,12 @@ __doc__ = """
 
 def set_loop_priority(kernel, loop_priority):
     from warnings import warn
-    warn("set_loop_priority is deprecated. Use prioritize_loops instead. "
-         "Attention: A call to set_loop_priority will overwrite any previously "
-         "set priorities!", DeprecationWarning, stacklevel=2)
-
-    if isinstance(loop_priority, str):
-        loop_priority = tuple(s.strip()
-                              for s in loop_priority.split(",") if s.strip())
-    loop_priority = tuple(loop_priority)
-
-    return kernel.copy(loop_priority=frozenset([loop_priority]))
+    warn("set_loop_priority is deprecated. Use constrain_loop_nesting instead. "
+         "Calling constrain_loop_nesting(kernel, must_nest=loop_priority). "
+         "Scheduler will now consider provided loop priority a required "
+         "(must_nest) constraint.",
+         DeprecationWarning, stacklevel=2)
+    return constrain_loop_nesting(kernel, must_nest=loop_priority)
 
 
 def prioritize_loops(kernel, loop_priority):
@@ -102,15 +99,769 @@ def prioritize_loops(kernel, loop_priority):
     all calls to prioritize_loops together establish a partial order on the
     inames (see https://en.wikipedia.org/wiki/Partially_ordered_set).
 
-    :arg: an iterable of inames, or, for brevity, a comma-separated string of
-        inames
+    :arg loop_priority: an iterable of inames, or, for brevity, a
+        comma-separated string of inames
     """
-    if isinstance(loop_priority, str):
-        loop_priority = tuple(s.strip()
-                              for s in loop_priority.split(",") if s.strip())
-    loop_priority = tuple(loop_priority)
 
-    return kernel.copy(loop_priority=kernel.loop_priority.union([loop_priority]))
+    from warnings import warn
+    warn("prioritize_loops is deprecated. Use constrain_loop_nesting instead. "
+         "Calling constrain_loop_nesting(kernel, must_nest=loop_priority). "
+         "Scheduler will now consider provided loop priority a required "
+         "(must_nest) constraint.",
+         DeprecationWarning, stacklevel=2)
+    return constrain_loop_nesting(kernel, must_nest=loop_priority)
+
+
+class UnexpandedInameSet(Record):
+    def __init__(self, inames, complement=False):
+        Record.__init__(
+            self,
+            inames=inames,
+            complement=complement,
+            )
+
+    def contains(self, iname):
+        return (iname not in self.inames if self.complement
+            else iname in self.inames)
+
+    def contains_all(self, iname_set):
+        return (not (iname_set & self.inames) if self.complement
+            else iname_set.issubset(self.inames))
+
+    def get_inames_represented(self, iname_universe=None):
+        """Return the set of inames represented by the UnexpandedInameSet
+        """
+        if self.complement:
+            if not iname_universe:
+                raise ValueError(
+                    "Cannot expand UnexpandedInameSet %s without "
+                    "iname_universe." % (self))
+            return iname_universe-self.inames
+        else:
+            return self.inames.copy()
+
+    def __lt__(self, other):
+        return self.__hash__() < other.__hash__()
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def update_persistent_hash(self, key_hash, key_builder):
+        """Custom hash computation function for use with
+        :class:`pytools.persistent_dict.PersistentDict`.
+        """
+
+        key_builder.rec(key_hash, self.inames)
+        key_builder.rec(key_hash, self.complement)
+
+    def __str__(self):
+        return "%s{%s}" % ("~" if self.complement else "",
+            ",".join(i for i in sorted(self.inames)))
+
+
+class LoopNestConstraints(Record):
+    def __init__(self, must_nest=None, must_not_nest=None,
+                 must_nest_graph=None):
+        Record.__init__(
+            self,
+            must_nest=must_nest,
+            must_not_nest=must_not_nest,
+            must_nest_graph=must_nest_graph,
+            )
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def update_persistent_hash(self, key_hash, key_builder):
+        """Custom hash computation function for use with
+        :class:`pytools.persistent_dict.PersistentDict`.
+        """
+
+        key_builder.rec(key_hash, self.must_nest)
+        key_builder.rec(key_hash, self.must_not_nest)
+        key_builder.rec(key_hash, self.must_nest_graph)
+
+    def __str__(self):
+        return "LoopNestConstraints(\n" \
+            "    must_nest = " + str(self.must_nest) + "\n" \
+            "    must_not_nest = " + str(self.must_not_nest) + "\n" \
+            "    must_nest_graph = " + str(self.must_nest_graph) + "\n" \
+            ")"
+
+
+def process_loop_nest_specification(
+        nesting,
+        max_tuple_size=None,
+        complement_sets_allowed=True,
+        ):
+    # make sure user-supplied nesting conforms to rules
+    # convert string representations of nestings to tuples of UnexpandedInameSets
+
+    import re
+
+    def raise_loop_nest_input_error(msg):
+        valid_prio_rules = (
+            'Valid `must_nest` description formats: '
+            '"iname, iname, ..." or (str, str, str, ...), '
+            'where str can be of form '
+            '"iname" or "{iname, iname, ...}". No set complements allowed.\n'
+            'Valid `must_not_nest` description tuples must have len <= 2: '
+            '"iname, iname", "iname, ~iname", or '
+            '(str, str), where str can be of form '
+            '"iname", "~iname", "{iname, iname, ...}", or "~{iname, iname, ...}".'
+            )
+        raise ValueError(
+                "Invalid loop nest prioritization: %s\n"
+                "Loop nest prioritization formatting rules:\n%s"
+                % (msg, valid_prio_rules))
+
+    def _error_on_regex_match(match_str, target_str):
+        if re.findall(match_str, target_str):
+            raise_loop_nest_input_error(
+                "Unrecognized character(s) %s in nest string %s"
+                % (re.findall(match_str, target_str), target_str))
+
+    def _process_iname_set_str(iname_set_str):
+        # convert something like ~{i,j} or ~i or "i,j" to an UnexpandedInameSet
+
+        # remove leading/trailing whitespace
+        iname_set_str_stripped = iname_set_str.strip()
+
+        if iname_set_str_stripped[0] == "~":
+            # Make sure compelement is allowed
+            if not complement_sets_allowed:
+                raise_loop_nest_input_error(
+                    "Complement (~) not allowed in this loop nest string %s. "
+                    "If you have a use-case where allowing a currently "
+                    "disallowed set complement would be helpful, and the "
+                    "desired nesting constraint cannot easily be expressed "
+                    "another way, "
+                    "please contact the Loo.py maintainers."
+                    % (iname_set_str))
+
+            # Make sure that braces are included if multiple inames present
+            if "," in iname_set_str and not (
+                    iname_set_str.startswith("~{") and
+                    iname_set_str.endswith("}")):
+                raise_loop_nest_input_error(
+                    "Complements of sets containing multiple inames must "
+                    "enclose inames in braces: %s is not valid."
+                    % (iname_set_str))
+
+            complement = True
+        else:
+            complement = False
+
+        # remove leading/trailing tilde, braces, and space
+        iname_set_str_stripped = iname_set_str_stripped.strip("~{} ")
+
+        # should be no remaining special characters besides comma and space
+        _error_on_regex_match(r'([^,\w ])', iname_set_str_stripped)
+
+        # split by commas or spaces to get inames
+        inames = re.findall(r'([\w]+)(?:[ |,]*|$)', iname_set_str_stripped)
+
+        # make sure iname count matches what we expect from comma count
+        if len(inames) != iname_set_str_stripped.count(",") + 1:
+            raise_loop_nest_input_error(
+                "Found %d inames but expected %d in string %s."
+                % (len(inames), iname_set_str_stripped.count(",") + 1,
+                   iname_set_str_stripped))
+
+        return UnexpandedInameSet(
+            set([s.strip() for s in iname_set_str_stripped.split(",")]),
+            complement=complement)
+
+    if isinstance(nesting, str):
+        # Enforce that priorities involving iname sets be passed as tuple
+        # Iname sets defined negatively with a single iname are allowed here
+
+        # check for any special characters besides comma, space, and tilde
+        _error_on_regex_match(r'([^,\w~ ])', nesting)
+
+        nesting_as_tuple = tuple(
+            _process_iname_set_str(set_str) for set_str in nesting.split(","))
+    else:
+        # nesting not passed as string
+        nesting_as_tuple = tuple(
+            _process_iname_set_str(set_str) for set_str in nesting)
+
+    # check max_inames_per_set
+    if max_tuple_size and len(nesting_as_tuple) > max_tuple_size:
+        raise_loop_nest_input_error(
+            "Loop nest prioritization tuple %s exceeds max tuple size %d."
+            % (nesting_as_tuple))
+
+    # make sure nesting has len > 1
+    if len(nesting_as_tuple) <= 1:
+        raise_loop_nest_input_error(
+            "Loop nest prioritization tuple %s must have length > 1."
+            % (nesting_as_tuple))
+
+    return nesting_as_tuple
+
+
+def _expand_iname_sets_in_tuple(
+        iname_sets_tuple,  # (UnexpandedInameSet, Unex..., ...)
+        all_inames,
+        ):
+
+    # First convert negatively defined iname sets to sets
+    positively_defined_iname_sets = []
+    for iname_set in iname_sets_tuple:
+        positively_defined_iname_sets.append(
+            iname_set.get_inames_represented(all_inames))
+
+    # Now expand all priority tuples into (before, after) pairs using
+    # Cartesian product of all pairs of sets
+    # (Assumes prio_sets length > 1)
+    import itertools
+    loop_priority_pairs = set()
+    for i, before_set in enumerate(positively_defined_iname_sets[:-1]):
+        for after_set in positively_defined_iname_sets[i+1:]:
+            loop_priority_pairs.update(
+                list(itertools.product(before_set, after_set)))
+
+    # Make sure no priority tuple contains an iname twice
+    for prio_tuple in loop_priority_pairs:
+        if len(set(prio_tuple)) != len(prio_tuple):
+            raise ValueError(
+                "Loop nesting %s contains cycle: %s. "
+                % (iname_sets_tuple, prio_tuple))
+    return loop_priority_pairs
+
+
+def check_must_not_nest_against_must_nest_graph(
+        must_not_nest_constraints, must_nest_graph):
+    # make sure none of the must_nest constraints violate must_not_nest
+    # this may not catch all problems
+
+    if must_not_nest_constraints and must_nest_graph:
+        import itertools
+        must_pairs = []
+        for iname_before, inames_after in must_nest_graph.items():
+            must_pairs.extend(
+                list(itertools.product([iname_before], inames_after)))
+        if any(not check_must_not_nest(must_pairs, must_not_nest_tuple)
+                for must_not_nest_tuple in must_not_nest_constraints):
+            raise ValueError(
+                "Nest constraint conflict detected. "
+                "must_not_nest constraints %s inconsistent with "
+                "must_nest relationships (must_nest graph: %s)."
+                % (must_not_nest_constraints, must_nest_graph))
+
+
+def constrain_loop_nesting(
+        kernel, must_nest=None, must_not_nest=None):
+    # TODO docstring
+    # TODO what if someone passes single-iname prio?
+    # TODO enforce that must_nest be a single tuple not list of tuples
+    # (or update implementation to allow list of tuples)
+
+    # check for existing constraints
+    if kernel.loop_nest_constraints:
+        if kernel.loop_nest_constraints.must_nest:
+            must_nest_constraints_old = kernel.loop_nest_constraints.must_nest
+        else:
+            must_nest_constraints_old = set()
+        if kernel.loop_nest_constraints.must_not_nest:
+            must_not_nest_constraints_old = \
+                kernel.loop_nest_constraints.must_not_nest
+        else:
+            must_not_nest_constraints_old = set()
+        if kernel.loop_nest_constraints.must_nest_graph:
+            must_nest_graph_old = kernel.loop_nest_constraints.must_nest_graph
+        else:
+            must_nest_graph_old = {}
+    else:
+        must_nest_constraints_old = set()
+        must_not_nest_constraints_old = set()
+        must_nest_graph_old = {}
+
+    # TODO remove (TEMPORARY HACK TO KEEP LEGACY CODE RUNNING)
+    expand_must_priorities = set()
+
+    if must_nest:
+        must_nest_tuple = process_loop_nest_specification(
+            must_nest, complement_sets_allowed=False)
+
+        # don't prioritize concurrent inames:
+        from loopy.kernel.data import ConcurrentTag
+        for iname_set in must_nest_tuple:
+            for iname in iname_set.inames:
+                if isinstance(kernel.iname_to_tag.get(iname, None),
+                        ConcurrentTag):
+                    raise ValueError(
+                        "iname %s tagged with ConcurrentTag, "
+                        "cannot use iname in must-nest constraint %s."
+                        % (iname, must_nest_tuple))
+
+        # Update must_nest graph
+        from pytools.graph import CycleError
+        try:
+            must_nest_graph_new = update_must_nest_graph(
+                must_nest_graph_old, must_nest_tuple, kernel.all_inames())
+        except CycleError:
+            raise ValueError(
+                "constrain_loop_nesting: Loop priority cycle detected. "
+                "must_nest constraints %s inconsistent with existing "
+                "must_nest constraints %s."
+                % (must_nest_tuple, must_nest_constraints_old))
+
+        # Check for inconsistent must_nest constraints by checking for cycle:
+        from pytools.graph import contains_cycle
+        if contains_cycle(must_nest_graph_new):
+            # TODO will this ever happen or does check above cover this?
+            raise ValueError(
+                "constrain_loop_nesting: Loop priority cycle detected. "
+                "must_nest constraints %s inconsistent with existing "
+                "must_nest constraints %s."
+                % (must_nest_tuple, must_nest_constraints_old))
+
+        # make sure none of the must_nest constraints violate must_not_nest
+        # this may not catch all problems
+        check_must_not_nest_against_must_nest_graph(
+            must_not_nest_constraints_old, must_nest_graph_new)
+
+        # check for conflicts with inames tagged 'vec'
+        from loopy.kernel.data import VectorizeTag
+        for iname, new_tag in six.iteritems(kernel.iname_to_tag):
+            if isinstance(new_tag, VectorizeTag) and (
+                    must_nest_graph_new.get(iname, set())):
+                # iname is not a leaf
+                raise ValueError(
+                    "Iname %s tagged as 'vec', but loop priorities "
+                    "%s require that iname %s nest outside of inames %s. "
+                    "Vectorized inames must nest innermost; cannot "
+                    "impose loop nest specification."
+                    % (iname, must_nest, iname,
+                    must_nest_graph_new.get(iname, set())))
+
+        # TODO remove (TEMPORARY HACK TO KEEP LEGACY CODE RUNNING)
+        expand_must_priorities = _expand_iname_sets_in_tuple(
+            must_nest_tuple, kernel.all_inames())
+
+        # Prepare to update value for must_nest constraints
+        must_nest_constraints_new = must_nest_constraints_old | set(
+            [must_nest_tuple, ])
+    else:
+        # no new must_nest constraints
+        must_nest_constraints_new = must_nest_constraints_old
+        must_nest_graph_new = must_nest_graph_old
+
+    if must_not_nest:
+        must_not_nest_tuple = process_loop_nest_specification(
+            must_not_nest, max_tuple_size=2)
+
+        # cycles are allowed in must_not_nest constraints,
+        # only need to check if incompatible with must_nest_constraints
+        import itertools
+        must_pairs = []
+        for iname_before, inames_after in must_nest_graph_new.items():
+            must_pairs.extend(list(itertools.product([iname_before], inames_after)))
+
+        if not check_must_not_nest(must_pairs, must_not_nest_tuple):
+            raise ValueError(
+                "constrain_loop_nesting: nest constraint conflict detected. "
+                "must_not_nest constraints %s inconsistent with "
+                "must_nest constraints %s."
+                % (must_not_nest_tuple, must_nest_constraints_new))
+
+        # prepare to update value for must_not_nest constraints
+        must_not_nest_constraints_new = must_not_nest_constraints_old | set([
+            must_not_nest_tuple, ])
+    else:
+        # no new must_not_nest constraints
+        must_not_nest_constraints_new = must_not_nest_constraints_old
+
+    nest_constraints = LoopNestConstraints(
+        must_nest=must_nest_constraints_new,
+        must_not_nest=must_not_nest_constraints_new,
+        must_nest_graph=must_nest_graph_new,
+        )
+
+    return kernel.copy(
+            loop_priority=kernel.loop_priority.union(expand_must_priorities),
+            loop_nest_constraints=nest_constraints,
+            )
+
+
+def check_must_nest(all_loop_nests, must_nest, all_inames):
+    # in order to make sure must_nest is satisfied, we
+    # need to expand all must_nest tiers
+
+    # TODO instead of expanding tiers into all pairs up front,
+    # create these pairs one at a time so that we can stop as soon as we fail
+
+    must_nest_expanded = _expand_iname_sets_in_tuple(must_nest, all_inames)
+    # must_nest_expanded contains pairs
+    for before, after in must_nest_expanded:
+        found = False
+        for nesting in all_loop_nests:
+            if before in nesting and after in nesting and (
+                    nesting.index(before) < nesting.index(after)):
+                found = True
+                break
+        if not found:
+            return False
+    return True
+
+
+def check_must_not_nest(all_loop_nests, must_not_nest):
+    # recall that must_not_nest may only contain two tiers
+
+    for nesting in all_loop_nests:
+        # Go thru each pair in all_loop_nests
+        for i, iname_before in enumerate(nesting):
+            for iname_after in nesting[i+1:]:
+                # Check whether it violates must not nest
+                if (must_not_nest[0].contains(iname_before)
+                        and must_not_nest[1].contains(iname_after)):
+                    # Stop as soon as we fail
+                    return False
+    return True
+
+
+def check_all_must_not_nests(all_loop_nests, must_not_nests):
+    # recall that must_not_nest may only contain two tiers
+    for must_not_nest in must_not_nests:
+        if not check_must_not_nest(all_loop_nests, must_not_nest):
+            return False
+    return True
+
+
+def is_loop_nesting_valid(
+        all_loop_nests,
+        must_nest_constraints,
+        must_not_nest_constraints,
+        all_inames):
+
+    # check must-nest constraints
+    must_nest_valid = True
+    if must_nest_constraints:
+        for must_nest in must_nest_constraints:
+            if not check_must_nest(
+                    all_loop_nests, must_nest, all_inames):
+                must_nest_valid = False
+                break
+
+    # check must-not-nest constraints
+    must_not_nest_valid = True
+    if must_not_nest_constraints is not None:
+        for must_not_nest in must_not_nest_constraints:
+            if not check_must_not_nest(
+                    all_loop_nests, must_not_nest):
+                must_not_nest_valid = False
+                break
+
+    return must_nest_valid and must_not_nest_valid
+
+
+def update_must_nest_graph(must_nest_graph, must_nest, all_inames):
+    from copy import deepcopy
+    new_graph = deepcopy(must_nest_graph)
+
+    # first, all inames must be a node in the graph:
+    for iname in all_inames:
+        if iname not in new_graph.keys():
+            new_graph[iname] = set()
+
+    # get (before, after) pairs:
+    must_nest_expanded = _expand_iname_sets_in_tuple(must_nest, all_inames)
+
+    # update graph:
+    for before, after in must_nest_expanded:
+        new_graph[before].add(after)
+
+    # compute transitive closure:
+    from pytools.graph import compute_transitive_closure
+    # TODO compute_transitive_closure now allows cycles; check for cycle separately
+    return compute_transitive_closure(new_graph)
+
+
+def get_iname_nestings(outline):
+    from loopy.schedule import EnterLoop, LeaveLoop
+    # return a list of tuples representing deepest nestings
+    nestings = []
+    current_tiers = []
+    already_exiting_loops = False
+    for outline_item in outline:
+        if isinstance(outline_item, EnterLoop):
+            already_exiting_loops = False
+            current_tiers.append(outline_item.iname)
+        elif isinstance(outline_item, LeaveLoop):
+            if not already_exiting_loops:
+                nestings.append(tuple(current_tiers))
+                already_exiting_loops = True
+            del current_tiers[-1]
+    return nestings
+
+
+def replace_inames_in_nest_constraints(
+        inames_to_replace, replacement_inames, old_constraints,
+        coalesce_duplicate_replacement_inames=False):
+    """
+    :arg inames_to_replace: A set of inames that may exist in
+        `old_constraints`, each of which is to be replaced with all inames
+        in `replacement_inames`.
+
+    :arg replacement_inames: A set of inames, all of which will repalce each
+        iname in `inames_to_replace` in `old_constraints`.
+
+    :arg old_constraints: An iterable of tuples containing one or more
+        :class:`UnexpandedInameSet` objects.
+    """
+
+    # replace each iname in inames_to_replace
+    # with *all* inames in replacement_inames
+
+    # loop through old_constraints and handle each nesting independently
+    new_constraints = set()
+    for old_nesting in old_constraints:
+        # loop through each iname_set in this nesting and perform replacement
+        new_nesting = []
+        for iname_set in old_nesting:
+
+            # find inames to be replaced
+            inames_found = inames_to_replace & iname_set.inames
+
+            # create the new set of inames with the replacements
+            if inames_found:
+                new_inames = iname_set.inames - inames_found
+                new_inames.update(replacement_inames)
+            else:
+                new_inames = iname_set.inames.copy()
+
+            new_nesting.append(
+                UnexpandedInameSet(new_inames, iname_set.complement))
+
+        # if we've removed things, new_nesting might only contain 1 item,
+        # in which case it's meaningless and we should just remove it
+        if len(new_nesting) > 1:
+            new_constraints.add(tuple(new_nesting))
+
+    # When joining inames, we may need to coalesce:
+    # e.g., if we join `i` and `j` into `ij`, and old_nesting was
+    # [{i, k}, {j, h}], at this point we have [{ij, k}, {ij, h}]
+    # which contains a cycle. If coalescing is enabled, change this
+    # to [{k}, ij, {h}] to remove the cycle.
+    if coalesce_duplicate_replacement_inames:
+
+        def coalesce_duplicate_inames_in_nesting(nesting, iname_candidates):
+            # TODO would like this to be generic, but for now, assumes all
+            # UnexpandedInameSets have complement=False, which works if
+            # we're only using this for must_nest constraints since they
+            # cannot have complements
+            for iname_set in nesting:
+                assert not iname_set.complement
+
+            import copy
+            # copy and convert nesting to list so we can modify
+            coalesced_nesting = list(copy.deepcopy(nesting))
+
+            # repeat coalescing step until we don't find any adjacent pairs
+            # containing duplicates (among iname_candidates)
+            found_duplicates = True
+            while found_duplicates:
+                found_duplicates = False
+                # loop through each iname_set in nesting and coalesce
+                # (assume new_nesting has at least 2 items)
+                i = 0
+                while i < len(coalesced_nesting)-1:
+                    iname_set_before = coalesced_nesting[i]
+                    iname_set_after = coalesced_nesting[i+1]
+                    # coalesce for each iname candidate
+                    for iname in iname_candidates:
+                        if (iname_set_before.inames == set([iname, ]) and
+                                iname_set_after.inames == set([iname, ])):
+                            # before/after contain single iname to be coalesced,
+                            # -> remove iname_set_after
+                            del coalesced_nesting[i+1]
+                            found_duplicates = True
+                        elif (iname_set_before.inames == set([iname, ]) and
+                                iname in iname_set_after.inames):
+                            # before contains single iname to be coalesced,
+                            # after contains iname along with others,
+                            # -> remove iname from iname_set_after.inames
+                            coalesced_nesting[i+1] = UnexpandedInameSet(
+                                inames=iname_set_after.inames - set([iname, ]),
+                                complement=iname_set_after.complement,
+                                )
+                            found_duplicates = True
+                        elif (iname in iname_set_before.inames and
+                                iname_set_after.inames == set([iname, ])):
+                            # after contains single iname to be coalesced,
+                            # before contains iname along with others,
+                            # -> remove iname from iname_set_before.inames
+                            coalesced_nesting[i] = UnexpandedInameSet(
+                                inames=iname_set_before.inames - set([iname, ]),
+                                complement=iname_set_before.complement,
+                                )
+                            found_duplicates = True
+                        elif (iname in iname_set_before.inames and
+                                iname in iname_set_after.inames):
+                            # before and after contain iname along with others,
+                            # -> remove iname from iname_set_{before,after}.inames
+                            # and insert it in between them
+                            coalesced_nesting[i] = UnexpandedInameSet(
+                                inames=iname_set_before.inames - set([iname, ]),
+                                complement=iname_set_before.complement,
+                                )
+                            coalesced_nesting[i+1] = UnexpandedInameSet(
+                                inames=iname_set_after.inames - set([iname, ]),
+                                complement=iname_set_after.complement,
+                                )
+                            coalesced_nesting.insert(i+1, UnexpandedInameSet(
+                                inames=set([iname, ]),
+                                complement=False,
+                                ))
+                            found_duplicates = True
+                        # else, iname was not found in both sets, so do nothing
+                    i = i + 1
+
+            return tuple(coalesced_nesting)
+
+        # loop through new_constraints; handle each nesting independently
+        coalesced_constraints = set()
+        for new_nesting in new_constraints:
+            coalesced_constraints.add(
+                coalesce_duplicate_inames_in_nesting(
+                    new_nesting, replacement_inames))
+
+        return coalesced_constraints
+    else:
+        return new_constraints
+
+
+def replace_inames_in_graph(
+        inames_to_replace, replacement_inames, old_graph):
+    # replace each iname in inames_to_replace with all inames in replacement_inames
+
+    new_graph = {}
+    iname_to_replace_found_as_key = False
+    union_of_inames_after_for_replaced_keys = set()
+    for iname, inames_after in old_graph.items():
+        # create new inames_after
+        new_inames_after = inames_after.copy()
+        inames_found = inames_to_replace & new_inames_after
+
+        if inames_found:
+            new_inames_after -= inames_found
+            new_inames_after.update(replacement_inames)
+
+        # update dict
+        if iname in inames_to_replace:
+            iname_to_replace_found_as_key = True
+            union_of_inames_after_for_replaced_keys = \
+                union_of_inames_after_for_replaced_keys | new_inames_after
+            # don't add this iname as a key in new graph
+        else:
+            new_graph[iname] = new_inames_after
+
+    # add replacement iname keys
+    if iname_to_replace_found_as_key:
+        for new_key in replacement_inames:
+            new_graph[new_key] = union_of_inames_after_for_replaced_keys.copy()
+
+    # check for cycle
+    from pytools.graph import contains_cycle
+    if contains_cycle(new_graph):
+        raise ValueError(
+            "replace_inames_in_graph: Loop priority cycle detected. "
+            "Cannot replace inames %s with inames %s."
+            % (inames_to_replace, replacement_inames))
+
+    return new_graph
+
+
+def replace_inames_in_all_nest_constraints(
+        knl, old_inames, new_inames,
+        coalesce_duplicate_replacement_inames=False,
+        pairs_that_must_not_voilate_constraints=set(),
+        ):
+
+    # get old must_nest and must_not_nest
+    # (must_nest_graph will be rebuilt)
+    if knl.loop_nest_constraints:
+        old_must_nest = knl.loop_nest_constraints.must_nest
+        old_must_not_nest = knl.loop_nest_constraints.must_not_nest
+        # (these could still be None)
+    else:
+        old_must_nest = None
+        old_must_not_nest = None
+
+    if old_must_nest:
+        # check to make sure special pairs don't conflict with constraints
+        for iname_before, iname_after in pairs_that_must_not_voilate_constraints:
+            if iname_before in knl.loop_nest_constraints.must_nest_graph[
+                    iname_after]:
+                raise ValueError(
+                    "Implied nestings violate existing must-nest constraints."
+                    "\nimplied nestings: %s\nmust-nest constraints: %s"
+                    % (pairs_that_must_not_voilate_constraints, old_must_nest))
+
+        new_must_nest = replace_inames_in_nest_constraints(
+            old_inames, new_inames, old_must_nest,
+            coalesce_duplicate_replacement_inames,
+            )
+    else:
+        new_must_nest = None
+
+    if old_must_not_nest:
+        # check to make sure special pairs don't conflict with constraints
+        if not check_all_must_not_nests(
+                pairs_that_must_not_voilate_constraints, old_must_not_nest):
+            raise ValueError(
+                "Implied nestings violate existing must-not-nest constraints."
+                "\nimplied nestings: %s\nmust-not-nest constraints: %s"
+                % (pairs_that_must_not_voilate_constraints, old_must_not_nest))
+
+        new_must_not_nest = replace_inames_in_nest_constraints(
+            old_inames, new_inames, old_must_not_nest)
+        # each must not nest constraint may only contain two tiers
+        # TODO coalesce_duplicate_replacement_inames?
+    else:
+        new_must_not_nest = None
+
+    # Rebuild must_nest graph
+    if new_must_nest:
+        new_must_nest_graph = {}
+        new_all_inames = (
+            knl.all_inames() - set(old_inames)) | set(new_inames)
+        from pytools.graph import CycleError
+        for must_nest_tuple in new_must_nest:
+            try:
+                new_must_nest_graph = update_must_nest_graph(
+                    new_must_nest_graph, must_nest_tuple, new_all_inames)
+            except CycleError:
+                raise ValueError(
+                    "Loop priority cycle detected when replacing inames %s "
+                    "with inames %s. Previous must_nest constraints: %s"
+                    % (old_inames, new_inames, old_must_nest))
+
+        # check for cycle
+        from pytools.graph import contains_cycle
+        if contains_cycle(new_must_nest_graph):
+            # TODO will this ever happen or does check above cover this?
+            raise ValueError(
+                "Loop priority cycle detected when replacing inames %s "
+                "with inames %s. Previous must_nest constraints: %s"
+                % (old_inames, new_inames, old_must_nest))
+
+        # make sure none of the must_nest constraints violate must_not_nest
+        # this may not catch all problems
+        check_must_not_nest_against_must_nest_graph(
+            new_must_not_nest, new_must_nest_graph)
+    else:
+        new_must_nest_graph = None
+
+    return knl.copy(
+            loop_nest_constraints=LoopNestConstraints(
+                must_nest=new_must_nest,
+                must_not_nest=new_must_not_nest,
+                must_nest_graph=new_must_nest_graph,
+                )
+            )
 
 # }}}
 
@@ -295,12 +1046,17 @@ def _split_iname_backend(kernel, split_iname,
                 new_prio = new_prio + (prio_iname,)
         new_priorities.append(new_prio)
 
+    # update must_nest, must_not_nest, and must_nest_graph
+    kernel = replace_inames_in_all_nest_constraints(
+        kernel, set([split_iname, ]), [inner_iname, outer_iname])
+
     kernel = kernel.copy(
             domains=new_domains,
             iname_slab_increments=iname_slab_increments,
             instructions=new_insns,
             applied_iname_rewrites=applied_iname_rewrites,
-            loop_priority=frozenset(new_priorities))
+            loop_priority=frozenset(new_priorities),
+            )
 
     rule_mapping_context = SubstitutionRuleMappingContext(
             kernel.substitutions, kernel.get_var_name_generator())
@@ -584,11 +1340,53 @@ def join_inames(kernel, inames, new_iname=None, tag=None, within=None):
                 within_inames=subst_within_inames(insn.within_inames))
             for insn in kernel.instructions]
 
+    # update must_nest, must_not_nest, and must_nest_graph
+    # (will fail if cycle is created in must-nest graph)
+    implied_nestings = set()
+    inames_orig_order = inames[::-1]  # this was reversed
+    for i, iname_before in enumerate(inames_orig_order[:-1]):
+        for iname_after in inames_orig_order[i+1:]:
+            implied_nestings.add((iname_before, iname_after))
+    kernel = replace_inames_in_all_nest_constraints(
+        kernel, set(inames), [new_iname],
+        coalesce_duplicate_replacement_inames=True,
+        pairs_that_must_not_voilate_constraints=implied_nestings,
+        )
+
+    # update legacy loop_priority
+    # TODO handle coalescing correctly here (until we remove old prios)
+    old_loop_priority = kernel.loop_priority
+    new_loop_priority = None
+    if old_loop_priority is not None:
+        new_loop_priority = set()
+        for old_tup in old_loop_priority:
+            new_tup = []
+            for iname in old_tup:
+                if iname in inames:
+                    # need to replace iname with new_iname
+                    if new_iname in new_tup[:-1]:
+                        # attempted to join inames with another iname
+                        # in between, error
+                        raise ValueError(
+                            "cannot join inames (%s) involved in legacy "
+                            "loop_priority if another iname is prioritized "
+                            "between them. knl.loop_priority: %s"
+                            % (inames, old_loop_priority))
+                    elif (not new_tup) or new_iname != new_tup[-1]:
+                        new_tup.append(new_iname)
+                    # (if new_iname == new_tup[-1], don't add it twice
+                else:
+                    new_tup.append(iname)
+            if len(new_tup) > 1:
+                new_loop_priority.update([tuple(new_tup)])
+        new_loop_priority = frozenset(new_loop_priority)
+
     kernel = (kernel
             .copy(
                 instructions=new_insns,
                 domains=domch.get_domains_with(new_domain),
-                applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict]
+                applied_iname_rewrites=kernel.applied_iname_rewrites + [subst_dict],
+                loop_priority=new_loop_priority,
                 ))
 
     from loopy.match import parse_stack_match
@@ -712,6 +1510,7 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False):
     iname_to_tag = [(iname, parse_tag(tag)) for iname, tag in iname_to_tag]
 
     from loopy.kernel.data import (ConcurrentTag, ForceSequentialTag,
+                                   VectorizeTag,
                                    filter_iname_tags_by_type)
 
     # {{{ globbing
@@ -751,10 +1550,21 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False):
         if iname not in kernel.all_inames():
             raise ValueError("cannot tag '%s'--not known" % iname)
 
-        if (isinstance(new_tag, ConcurrentTag)
-                and filter_iname_tags_by_type(old_tags, ForceSequentialTag)):
-            raise ValueError("cannot tag '%s' as parallel--"
-                    "iname requires sequential execution" % iname)
+        if isinstance(new_tag, ConcurrentTag):
+            if filter_iname_tags_by_type(old_tags, ForceSequentialTag):
+                raise ValueError("cannot tag '%s' as parallel--"
+                        "iname requires sequential execution" % iname)
+
+            # if iname found in must_nest, fail
+            if kernel.loop_nest_constraints:
+                must_nest = kernel.loop_nest_constraints.must_nest
+                if must_nest:
+                    for nesting in must_nest:
+                        for iname_set in nesting:
+                            if iname in iname_set.inames:
+                                raise ValueError("cannot tag '%s' as concurrent--"
+                                        "iname involved in must-nest constraint %s."
+                                        % (iname, nesting))
 
         if (isinstance(new_tag, ForceSequentialTag)
                 and filter_iname_tags_by_type(old_tags, ConcurrentTag)):
@@ -763,6 +1573,19 @@ def tag_inames(kernel, iname_to_tag, force=False, ignore_nonexistent=False):
                     "(likely because of participation in a precompute or "
                     "a reduction)" % iname)
 
+        if isinstance(new_tag, VectorizeTag):
+            # vec_inames will be nested innermost,
+            # check whether this conflicts with loop priorities
+            must_nest_graph = (kernel.loop_nest_constraints.must_nest_graph
+                if kernel.loop_nest_constraints else None)
+            if must_nest_graph and must_nest_graph.get(iname, set()):
+                # iname is not a leaf
+                raise ValueError(
+                    "Loop priorities provided specify that iname %s nest "
+                    "outside of inames %s, but vectorized inames "
+                    "must nest innermost. Cannot tag %s with 'vec' tag."
+                    % (iname, must_nest_graph.get(iname, set()), iname))
+
         knl_iname_to_tags[iname] = old_tags | frozenset([new_tag])
 
     return kernel.copy(iname_to_tags=knl_iname_to_tags)
@@ -878,10 +1701,33 @@ def duplicate_inames(knl, inames, within, new_inames=None, suffix=None,
         from loopy.kernel.tools import DomainChanger
         domch = DomainChanger(knl, frozenset([old_iname]))
 
+        # update must_nest, must_not_nest, and must_nest_graph
+        # (don't remove any unused inames yet, that happens later)
+        knl = replace_inames_in_all_nest_constraints(
+            knl, set([old_iname, ]), [old_iname, new_iname])
+
+        # update legacy loop_priority
+        if knl.loop_priority:
+            new_loop_priority = []
+            for iname_tuple in knl.loop_priority:
+                try:
+                    idx = iname_tuple.index(old_iname)
+                    new_tuple = list(iname_tuple)
+                    new_tuple[idx] = new_iname
+                    new_tuple = tuple(new_tuple)
+                except ValueError:
+                    new_tuple = iname_tuple
+                new_loop_priority.append(new_tuple)
+            new_loop_priority = frozenset(new_loop_priority)
+        else:
+            new_loop_priority = knl.loop_priority
+
         from loopy.isl_helpers import duplicate_axes
         knl = knl.copy(
                 domains=domch.get_domains_with(
-                    duplicate_axes(domch.domain, [old_iname], [new_iname])))
+                    duplicate_axes(domch.domain, [old_iname], [new_iname])),
+                loop_priority=new_loop_priority,
+                )
 
     # }}}
 
@@ -1112,6 +1958,15 @@ def rename_iname(knl, old_iname, new_iname, existing_ok=False, within=None):
                 "--cannot rename" % new_iname)
 
     if does_exist:
+
+        if knl.loop_nest_constraints and (
+                knl.loop_nest_constraints.must_nest or
+                knl.loop_nest_constraints.must_not_nest or
+                knl.loop_nest_constraints.must_nest_graph):
+            raise NotImplementedError(
+                "rename_iname() does not yet handle new loop nest "
+                "constraints when does_exist=True.")
+
         # {{{ check that the domains match up
 
         dom = knl.get_inames_domain(frozenset((old_iname, new_iname)))
@@ -1236,6 +2091,9 @@ def remove_unused_inames(knl, inames=None):
 
     # }}}
 
+    # now need to remove inames from loop priorities
+    knl = replace_inames_in_all_nest_constraints(knl, unused_inames, [])
+
     return knl
 
 
@@ -1733,6 +2591,8 @@ def make_reduction_inames_unique(kernel, inames=None, within=None):
 
     # {{{ duplicate the inames
 
+    # TODO need to update inames in priorities
+
     for old_iname, new_iname in r_uniq.old_to_new:
         from loopy.kernel.tools import DomainChanger
         domch = DomainChanger(kernel, frozenset([old_iname]))
diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py
index e6ecb4093ad24ceafe521c5379f4d2cd96ea6f52..93848ed78a0ad7ad62c39052de8639743981dd9b 100644
--- a/loopy/transform/instruction.py
+++ b/loopy/transform/instruction.py
@@ -118,6 +118,12 @@ def add_dependency(kernel, insn_match, depends_on):
 
     return result
 
+
+def add_dependencies_v2(knl, new_dependencies):
+    # TODO implement this function
+    assert isinstance(new_dependencies, frozenset)
+    return knl.copy(dependencies=frozenset(knl.dependencies | new_dependencies))
+
 # }}}
 
 
diff --git a/test/test_apps.py b/test/test_apps.py
index 71029cc9ce408f8e7fa95eaf3b766864c4beee5b..6c201e7770aa5b44d8a5eb126a050272a2168061 100644
--- a/test/test_apps.py
+++ b/test/test_apps.py
@@ -586,16 +586,14 @@ def test_poisson_fem(ctx_factory):
 
     ref_knl = knl
 
-    knl = lp.prioritize_loops(knl, ["c", "j", "i", "k"])
+    knl = lp.prioritize_loops(knl, ["c", "j", "k", "i"])
 
     def variant_1(knl):
         knl = lp.precompute(knl, "dpsi", "i,k,ell", default_tag='for')
-        knl = lp.prioritize_loops(knl, "c,i,j")
         return knl
 
     def variant_2(knl):
         knl = lp.precompute(knl, "dpsi", "i,ell", default_tag='for')
-        knl = lp.prioritize_loops(knl, "c,i,j")
         return knl
 
     def add_types(knl):
diff --git a/test/test_linearization_checker.py b/test/test_linearization_checker.py
index 520efba9b678f730474433baf92df8d397469f79..2989005610c8b6ec9d8719b2959c3f18ce7e4a0a 100644
--- a/test/test_linearization_checker.py
+++ b/test/test_linearization_checker.py
@@ -31,11 +31,14 @@ from pyopencl.tools import (  # noqa
     as pytest_generate_tests)
 from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2  # noqa
 import logging
+from loopy.kernel import KernelState
 from loopy import (
     preprocess_kernel,
     get_one_linearized_kernel,
 )
 
+lp.set_caching_enabled(False)
+
 logger = logging.getLogger(__name__)
 
 try:
@@ -46,7 +49,9 @@ else:
     faulthandler.enable()
 
 
-def test_lexschedule_and_map_creation():
+# {{{ test PairwiseScheduleBuilder and map creation
+
+def test_pairwise_schedule_and_map_creation():
     import islpy as isl
     from loopy.schedule.checker import (
         get_schedule_for_statement_pair,
@@ -400,6 +405,697 @@ def test_lexschedule_and_map_creation():
         # insn_d was linearized first, check schedule accordingly
         perform_insn_cd_checks_with(1, 0)
 
+# }}}
+
+
+# {{{ test statement instance ordering creation
+
+def test_statement_instance_ordering_creation():
+    import islpy as isl
+    from loopy.schedule.checker import (
+        get_schedule_for_statement_pair,
+    )
+    from loopy.schedule.checker.utils import (
+        ensure_dim_names_match_and_align,
+        append_marker_to_isl_map_var_names,
+    )
+    from loopy.schedule.checker.lexicographic_order_map import (
+        get_statement_ordering_map,
+    )
+
+    # example kernel (add deps to fix loop order)
+    knl = lp.make_kernel(
+        [
+            "{[i]: 0<=i<pi}",
+            "{[k]: 0<=k<pk}",
+            "{[j]: 0<=j<pj}",
+            "{[t]: 0<=t<pt}",
+        ],
+        """
+        for i
+            for k
+                <>temp = b[i,k]  {id=insn_a}
+            end
+            for j
+                a[i,j] = temp + 1  {id=insn_b,dep=insn_a}
+                c[i,j] = d[i,j]  {id=insn_c,dep=insn_b}
+            end
+        end
+        for t
+            e[t] = f[t]  {id=insn_d, dep=insn_c}
+        end
+        """,
+        name="example",
+        assumptions="pi,pj,pk,pt >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.add_and_infer_dtypes(
+            knl,
+            {"b": np.float32, "d": np.float32, "f": np.float32})
+    knl = lp.prioritize_loops(knl, "i,k")
+    knl = lp.prioritize_loops(knl, "i,j")
+
+    # get a linearization
+    knl = preprocess_kernel(knl)
+    knl = get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    def check_sio_for_insn_pair(
+            insn_id_before,
+            insn_id_after,
+            expected_lex_order_map,
+            expected_sio,
+            ):
+
+        sched_builder = get_schedule_for_statement_pair(
+            knl,
+            linearization_items,
+            insn_id_before,
+            insn_id_after,
+            )
+
+        # Get two isl maps from the PairwiseScheduleBuilder
+        sched_map_before, sched_map_after = sched_builder.build_maps(knl)
+
+        # get map representing lexicographic ordering
+        sched_lex_order_map = sched_builder.get_lex_order_map_for_sched_space()
+
+        assert sched_lex_order_map == expected_lex_order_map
+
+        # create statement instance ordering,
+        # maps each statement instance to all statement instances occuring later
+        sio = get_statement_ordering_map(
+            sched_map_before,
+            sched_map_after,
+            sched_lex_order_map,
+            )
+
+        sio_aligned = ensure_dim_names_match_and_align(sio, expected_sio)
+
+        assert sio_aligned == expected_sio
+
+    expected_lex_order_map = isl.Map("{ "
+        "[_lp_linchk_l0', _lp_linchk_l1', _lp_linchk_l2', _lp_linchk_l3', "
+        "_lp_linchk_l4']"
+        " -> "
+        "[_lp_linchk_l0, _lp_linchk_l1, _lp_linchk_l2, _lp_linchk_l3, "
+        "_lp_linchk_l4]"
+        ":"
+        "("
+        "_lp_linchk_l0' < _lp_linchk_l0 "
+        ") or ("
+        "_lp_linchk_l0'= _lp_linchk_l0 and "
+        "_lp_linchk_l1' < _lp_linchk_l1 "
+        ") or ("
+        "_lp_linchk_l0'= _lp_linchk_l0 and "
+        "_lp_linchk_l1'= _lp_linchk_l1 and "
+        "_lp_linchk_l2' < _lp_linchk_l2 "
+        ") or ("
+        "_lp_linchk_l0'= _lp_linchk_l0 and "
+        "_lp_linchk_l1'= _lp_linchk_l1 and "
+        "_lp_linchk_l2'= _lp_linchk_l2 and "
+        "_lp_linchk_l3' < _lp_linchk_l3 "
+        ") or ("
+        "_lp_linchk_l0'= _lp_linchk_l0 and "
+        "_lp_linchk_l1'= _lp_linchk_l1 and "
+        "_lp_linchk_l2'= _lp_linchk_l2 and "
+        "_lp_linchk_l3'= _lp_linchk_l3 and "
+        "_lp_linchk_l4' < _lp_linchk_l4"
+        ")"
+        "}")
+
+    # Isl ignores these apostrophes, but test would still pass since it ignores
+    # variable names when checking for equality. Even so, explicitly add apostrophes
+    # for sanity.
+    expected_lex_order_map = append_marker_to_isl_map_var_names(
+        expected_lex_order_map, isl.dim_type.in_, "'")
+
+    # Relationship between insn_a and insn_b ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pi, pj, pk] -> { "
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i, j]:"
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i=i', j]:"
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
+
+    check_sio_for_insn_pair(
+        "insn_a", "insn_b", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_a and insn_c ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pi, pj, pk] -> { "
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i, j]:"
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj and 0 <= i < pi and i > i'; "
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, i=i', j]:"
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= j < pj "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
+
+    check_sio_for_insn_pair(
+        "insn_a", "insn_c", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_a and insn_d ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pt, pi, pk] -> { "
+        "[_lp_linchk_statement'=0, i', k'] -> [_lp_linchk_statement=1, t]:"
+        "0 <= i' < pi and 0 <= k' < pk and 0 <= t < pt "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
+
+    check_sio_for_insn_pair(
+        "insn_a", "insn_d", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_b and insn_c ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pi, pj] -> { "
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i, j]:"
+        "0 <= i' < pi and 0 <= j' < pj and i > i' and 0 <= i < pi and 0 <= j < pj; "
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i=i', j]:"
+        "0 <= i' < pi and 0 <= j' < pj and j > j' and 0 <= j < pj; "
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, i=i', j=j']:"
+        "0 <= i' < pi and 0 <= j' < pj "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
+
+    check_sio_for_insn_pair(
+        "insn_b", "insn_c", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_b and insn_d ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pt, pi, pj] -> { "
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, t]:"
+        "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
+
+    check_sio_for_insn_pair(
+        "insn_b", "insn_d", expected_lex_order_map, expected_sio)
+
+    # Relationship between insn_c and insn_d ---------------------------------------
+
+    expected_sio = isl.Map(
+        "[pt, pi, pj] -> { "
+        "[_lp_linchk_statement'=0, i', j'] -> [_lp_linchk_statement=1, t]:"
+        "0 <= i' < pi and 0 <= j' < pj and 0 <= t < pt "
+        "}"
+        )
+    # isl ignores these apostrophes, so explicitly add them
+    expected_sio = append_marker_to_isl_map_var_names(
+        expected_sio, isl.dim_type.in_, "'")
+
+    check_sio_for_insn_pair(
+        "insn_c", "insn_d", expected_lex_order_map, expected_sio)
+
+# }}}
+
+
+def test_linearization_checker_with_loop_prioritization():
+    knl = lp.make_kernel(
+        [
+            "{[i]: 0<=i<pi}",
+            "{[k]: 0<=k<pk}",
+            "{[j]: 0<=j<pj}",
+            "{[t]: 0<=t<pt}",
+        ],
+        """
+        for i
+            for k
+                <>temp = b[i,k]  {id=insn_a}
+            end
+            for j
+                a[i,j] = temp + 1  {id=insn_b,dep=insn_a}
+                c[i,j] = d[i,j]  {id=insn_c}
+            end
+        end
+        for t
+            e[t] = f[t]  {id=insn_d}
+        end
+        """,
+        name="example",
+        assumptions="pi,pj,pk,pt >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.add_and_infer_dtypes(
+            knl,
+            {"b": np.float32, "d": np.float32, "f": np.float32})
+    knl = lp.prioritize_loops(knl, "i,k")
+    knl = lp.prioritize_loops(knl, "i,j")
+
+    unprocessed_knl = knl.copy()
+
+    deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl)
+    if hasattr(lp, "add_dependencies_v2"):
+        # TODO update this after dep refactoring
+        knl = lp.add_dependencies_v2(  # pylint:disable=no-member
+            knl, deps)
+
+    # get a linearization to check
+    if knl.state < KernelState.PREPROCESSED:
+        knl = preprocess_kernel(knl)
+    knl = get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = lp.check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+
+def test_linearization_checker_with_matmul():
+    bsize = 16
+    knl = lp.make_kernel(
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
+            [
+                "c[i, j] = sum(k, a[i, k]*b[k, j])"
+            ],
+            name="matmul",
+            assumptions="n,m,ell >= 1",
+            lang_version=(2018, 2),
+            )
+    knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32))
+    knl = lp.split_iname(knl, "i", bsize, outer_tag="g.0", inner_tag="l.1")
+    knl = lp.split_iname(knl, "j", bsize, outer_tag="g.1", inner_tag="l.0")
+    knl = lp.split_iname(knl, "k", bsize)
+    knl = lp.add_prefetch(knl, "a", ["k_inner", "i_inner"], default_tag="l.auto")
+    knl = lp.add_prefetch(knl, "b", ["j_inner", "k_inner"], default_tag="l.auto")
+    knl = lp.prioritize_loops(knl, "k_outer,k_inner")
+
+    unprocessed_knl = knl.copy()
+
+    deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl)
+    if hasattr(lp, "add_dependencies_v2"):
+        # TODO update this after dep refactoring
+        knl = lp.add_dependencies_v2(  # pylint:disable=no-member
+            knl, deps)
+
+    # get a linearization to check
+    if knl.state < KernelState.PREPROCESSED:
+        knl = preprocess_kernel(knl)
+    knl = get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = lp.check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+
+def test_linearization_checker_with_scan():
+    stride = 1
+    n_scan = 16
+    knl = lp.make_kernel(
+        "[n] -> {[i,j]: 0<=i<n and 0<=j<=%d*i}" % stride,
+        """
+        a[i] = sum(j, j**2)
+        """,
+        name="scan",
+        lang_version=(2018, 2),
+        )
+
+    knl = lp.fix_parameters(knl, n=n_scan)
+    knl = lp.realize_reduction(knl, force_scan=True)
+
+
+def test_linearization_checker_with_dependent_domain():
+    knl = lp.make_kernel(
+        [
+            "[n] -> {[i]: 0<=i<n}",
+            "{[j]: 0<=j<=2*i}"
+        ],
+        """
+        a[i] = sum(j, j**2) {id=scan}
+        """,
+        name="dependent_domain",
+        lang_version=(2018, 2),
+        )
+    # TODO current check for unused inames is incorrectly
+    # causing linearizing to fail when realize_reduction is used
+    #knl = lp.realize_reduction(knl, force_scan=True)
+
+    unprocessed_knl = knl.copy()
+
+    deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl)
+    if hasattr(lp, "add_dependencies_v2"):
+        # TODO update this after dep refactoring
+        knl = lp.add_dependencies_v2(  # pylint:disable=no-member
+            knl, deps)
+
+    # get a linearization to check
+    if knl.state < KernelState.PREPROCESSED:
+        knl = preprocess_kernel(knl)
+    knl = get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = lp.check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+
+def test_linearization_checker_with_stroud_bernstein():
+    knl = lp.make_kernel(
+            "{[el, i2, alpha1,alpha2]: \
+                    0 <= el < nels and \
+                    0 <= i2 < nqp1d and \
+                    0 <= alpha1 <= deg and 0 <= alpha2 <= deg-alpha1 }",
+            """
+            for el,i2
+                <> xi = qpts[1, i2]
+                <> s = 1-xi
+                <> r = xi/s
+                <> aind = 0 {id=aind_init}
+                for alpha1
+                    <> w = s**(deg-alpha1) {id=init_w}
+                    for alpha2
+                        tmp[el,alpha1,i2] = tmp[el,alpha1,i2] + w * coeffs[aind] \
+                                {id=write_tmp,dep=init_w:aind_init}
+                        w = w * r * ( deg - alpha1 - alpha2 ) / (1 + alpha2) \
+                                {id=update_w,dep=init_w:write_tmp}
+                        aind = aind + 1 \
+                                {id=aind_incr,dep=aind_init:write_tmp:update_w}
+                    end
+                end
+            end
+            """,
+            [lp.GlobalArg("coeffs", None, shape=None), "..."],
+            name="stroud_bernstein_orig", assumptions="deg>=0 and nels>=1")
+    knl = lp.add_and_infer_dtypes(knl,
+        dict(coeffs=np.float32, qpts=np.int32))
+    knl = lp.fix_parameters(knl, nqp1d=7, deg=4)
+    knl = lp.split_iname(knl, "el", 16, inner_tag="l.0")
+    knl = lp.split_iname(knl, "el_outer", 2, outer_tag="g.0",
+        inner_tag="ilp", slabs=(0, 1))
+    knl = lp.tag_inames(knl, dict(i2="l.1", alpha1="unr", alpha2="unr"))
+
+    unprocessed_knl = knl.copy()
+
+    deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl)
+    if hasattr(lp, "add_dependencies_v2"):
+        # TODO update this after dep refactoring
+        knl = lp.add_dependencies_v2(  # pylint:disable=no-member
+            knl, deps)
+
+    # get a linearization to check
+    if knl.state < KernelState.PREPROCESSED:
+        knl = preprocess_kernel(knl)
+    knl = get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = lp.check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+
+def test_linearization_checker_with_nop():
+    knl = lp.make_kernel(
+        [
+            "{[b]: b_start<=b<b_end}",
+            "{[c]: c_start<=c<c_end}",
+        ],
+        """
+         for b
+          <> c_end = 2
+          for c
+           ... nop
+          end
+         end
+        """,
+        "...",
+        seq_dependencies=True)
+    knl = lp.fix_parameters(knl, dim=3)
+
+    unprocessed_knl = knl.copy()
+
+    deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl)
+    if hasattr(lp, "add_dependencies_v2"):
+        # TODO update this after dep refactoring
+        knl = lp.add_dependencies_v2(  # pylint:disable=no-member
+            knl, deps)
+
+    # get a linearization to check
+    if knl.state < KernelState.PREPROCESSED:
+        knl = preprocess_kernel(knl)
+    knl = get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = lp.check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+
+def test_linearization_checker_with_multi_domain():
+    knl = lp.make_kernel(
+        [
+            "{[i]: 0<=i<ni}",
+            "{[j]: 0<=j<nj}",
+            "{[k]: 0<=k<nk}",
+            "{[x,xx]: 0<=x,xx<nx}",
+        ],
+        """
+        for x,xx
+          for i
+            <>acc = 0 {id=insn0}
+            for j
+              for k
+                acc = acc + j + k {id=insn1,dep=insn0}
+              end
+            end
+          end
+        end
+        """,
+        name="nest_multi_dom",
+        assumptions="ni,nj,nk,nx >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.prioritize_loops(knl, "x,xx,i")
+    knl = lp.prioritize_loops(knl, "i,j")
+    knl = lp.prioritize_loops(knl, "j,k")
+
+    unprocessed_knl = knl.copy()
+
+    deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl)
+    if hasattr(lp, "add_dependencies_v2"):
+        # TODO update this after dep refactoring
+        knl = lp.add_dependencies_v2(  # pylint:disable=no-member
+            knl, deps)
+
+    # get a linearization to check
+    if knl.state < KernelState.PREPROCESSED:
+        knl = preprocess_kernel(knl)
+    knl = get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = lp.check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+
+def test_linearization_checker_with_loop_carried_deps():
+    knl = lp.make_kernel(
+        "{[i]: 0<=i<n}",
+        """
+        <>acc0 = 0 {id=insn0}
+        for i
+          acc0 = acc0 + i {id=insn1,dep=insn0}
+          <>acc2 = acc0 + i {id=insn2,dep=insn1}
+          <>acc3 = acc2 + i {id=insn3,dep=insn2}
+          <>acc4 = acc0 + i {id=insn4,dep=insn1}
+        end
+        """,
+        name="loop_carried_deps",
+        assumptions="n >= 1",
+        lang_version=(2018, 2)
+        )
+
+    unprocessed_knl = knl.copy()
+
+    deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl)
+    if hasattr(lp, "add_dependencies_v2"):
+        # TODO update this after dep refactoring
+        knl = lp.add_dependencies_v2(  # pylint:disable=no-member
+            knl, deps)
+
+    # get a linearization to check
+    if knl.state < KernelState.PREPROCESSED:
+        knl = preprocess_kernel(knl)
+    knl = get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = lp.check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+
+def test_linearization_checker_and_invalid_prioritiy_detection():
+    ref_knl = lp.make_kernel(
+        [
+            "{[h]: 0<=h<nh}",
+            "{[i]: 0<=i<ni}",
+            "{[j]: 0<=j<nj}",
+            "{[k]: 0<=k<nk}",
+        ],
+        """
+        <> acc = 0
+        for h,i,j,k
+              acc = acc + h + i + j + k
+        end
+        """,
+        name="priorities",
+        assumptions="ni,nj,nk,nh >= 1",
+        lang_version=(2018, 2)
+        )
+
+    # no error:
+    knl0 = lp.prioritize_loops(ref_knl, "h,i")
+    knl0 = lp.prioritize_loops(ref_knl, "i,j")
+    knl0 = lp.prioritize_loops(knl0, "j,k")
+
+    unprocessed_knl = knl0.copy()
+
+    deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl)
+    if hasattr(lp, "add_dependencies_v2"):
+        # TODO update this after dep refactoring
+        knl0 = lp.add_dependencies_v2(  # pylint:disable=no-member
+            knl0, deps)
+
+    # get a linearization to check
+    if knl0.state < KernelState.PREPROCESSED:
+        knl0 = preprocess_kernel(knl0)
+    knl0 = get_one_linearized_kernel(knl0)
+    linearization_items = knl0.linearization
+
+    linearization_is_valid = lp.check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+    # no error:
+    knl1 = lp.prioritize_loops(ref_knl, "h,i,k")
+    knl1 = lp.prioritize_loops(knl1, "h,j,k")
+
+    unprocessed_knl = knl1.copy()
+
+    deps = lp.create_dependencies_from_legacy_knl(unprocessed_knl)
+    if hasattr(lp, "add_dependencies_v2"):
+        # TODO update this after dep refactoring
+        knl1 = lp.add_dependencies_v2(  # pylint:disable=no-member
+            knl1, deps)
+
+    # get a linearization to check
+    if knl1.state < KernelState.PREPROCESSED:
+        knl1 = preprocess_kernel(knl1)
+    knl1 = get_one_linearized_kernel(knl1)
+    linearization_items = knl1.linearization
+
+    linearization_is_valid = lp.check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+    # error (cycle):
+    knl2 = lp.prioritize_loops(ref_knl, "h,i,j")
+    knl2 = lp.prioritize_loops(knl2, "j,k")
+    # TODO think about when legacy deps should be updated based on prio changes
+
+    try:
+        if hasattr(lp, "constrain_loop_nesting"):
+            knl2 = lp.constrain_loop_nesting(knl2, "k,i")  # pylint:disable=no-member
+
+            # legacy deps depend on priorities, so update deps using new knl
+            deps = lp.create_dependencies_from_legacy_knl(knl2)
+            if hasattr(lp, "add_dependencies_v2"):
+                # TODO update this after dep refactoring
+                knl2 = lp.add_dependencies_v2(  # pylint:disable=no-member
+                    knl2, deps)
+        else:
+            knl2 = lp.prioritize_loops(knl2, "k,i")
+
+            # legacy deps depend on priorities, so update deps using new knl
+            deps = lp.create_dependencies_from_legacy_knl(knl2)
+            if hasattr(lp, "add_dependencies_v2"):
+                # TODO update this after dep refactoring
+                knl2 = lp.add_dependencies_v2(  # pylint:disable=no-member
+                    knl2, deps)
+
+            unprocessed_knl = knl2.copy()
+
+            # get a linearization to check
+            if knl2.state < KernelState.PREPROCESSED:
+                knl2 = preprocess_kernel(knl2)
+            knl2 = get_one_linearized_kernel(knl2)
+            linearization_items = knl2.linearization
+
+            linearization_is_valid = lp.check_linearization_validity(
+                unprocessed_knl, deps, linearization_items)
+        # should raise error
+        assert False
+    except ValueError as e:
+        if hasattr(lp, "constrain_loop_nesting"):
+            assert "cycle detected" in str(e)
+        else:
+            assert "invalid priorities" in str(e)
+
+    # error (inconsistent priorities):
+    knl3 = lp.prioritize_loops(ref_knl, "h,i,j,k")
+    # TODO think about when legacy deps should be updated based on prio changes
+    try:
+        if hasattr(lp, "constrain_loop_nesting"):
+            knl3 = lp.constrain_loop_nesting(  # pylint:disable=no-member
+                knl3, "h,j,i,k")
+
+            # legacy deps depend on priorities, so update deps using new knl
+            deps = lp.create_dependencies_from_legacy_knl(knl3)
+            if hasattr(lp, "add_dependencies_v2"):
+                # TODO update this after dep refactoring
+                knl3 = lp.add_dependencies_v2(  # pylint:disable=no-member
+                    knl3, deps)
+        else:
+            knl3 = lp.prioritize_loops(knl3, "h,j,i,k")
+
+            # legacy deps depend on priorities, so update deps using new knl
+            deps = lp.create_dependencies_from_legacy_knl(knl3)
+            if hasattr(lp, "add_dependencies_v2"):
+                # TODO update this after dep refactoring
+                knl3 = lp.add_dependencies_v2(  # pylint:disable=no-member
+                    knl3, deps)
+
+            unprocessed_knl = knl3.copy()
+
+            # get a linearization to check
+            if knl3.state < KernelState.PREPROCESSED:
+                knl3 = preprocess_kernel(knl3)
+            knl3 = get_one_linearized_kernel(knl3)
+            linearization_items = knl3.linearization
+
+            linearization_is_valid = lp.check_linearization_validity(
+                unprocessed_knl, deps, linearization_items)
+        # should raise error
+        assert False
+    except ValueError as e:
+        if hasattr(lp, "constrain_loop_nesting"):
+            assert "cycle detected" in str(e)
+        else:
+            assert "invalid priorities" in str(e)
+
+# TODO create more kernels with invalid linearizations to test linearization checker
+
 
 if __name__ == "__main__":
     if len(sys.argv) > 1:
diff --git a/test/test_loopy.py b/test/test_loopy.py
index 61a3f167be66f1c99adc3a52473d8edc747479e1..ecb8e6ff0567d4ebd61c158d3acfd7f19314e92f 100644
--- a/test/test_loopy.py
+++ b/test/test_loopy.py
@@ -55,6 +55,1326 @@ __all__ = [
 from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2  # noqa
 
 
+def test_new_loop_priority_backward_compatibility():
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
+            "out[g,h,i,j,k] = 2*a[g,h,i,j,k]",
+            assumptions="n >= 1",
+            )
+
+    knl = ref_knl
+    knl = lp.prioritize_loops(knl, "g,j")
+    knl = lp.prioritize_loops(knl, "h,j")
+    knl = lp.prioritize_loops(knl, "i,j")
+    knl = lp.prioritize_loops(knl, "k,j")
+    expected_prio = frozenset({('g', 'j'), ('h', 'j'), ('i', 'j'), ('k', 'j')})
+    assert knl.loop_priority == expected_prio
+
+    knl = ref_knl
+    knl = lp.prioritize_loops(knl, ("g", "j"))
+    knl = lp.prioritize_loops(knl, ("h", "j"))
+    knl = lp.prioritize_loops(knl, ("i", "j"))
+    knl = lp.prioritize_loops(knl, ("k", "j"))
+    assert knl.loop_priority == expected_prio
+
+    knl = ref_knl
+    knl = lp.prioritize_loops(knl, "g, i")
+    knl = lp.prioritize_loops(knl, "h, i")
+    knl = lp.prioritize_loops(knl, "k, i")
+    knl = lp.prioritize_loops(knl, "g, j")
+    knl = lp.prioritize_loops(knl, "h, j")
+    knl = lp.prioritize_loops(knl, "k, j")
+    expected_prio = frozenset(
+        {('g', 'i'), ('g', 'j'), ('h', 'i'), ('h', 'j'), ('k', 'i'), ('k', 'j')})
+    assert knl.loop_priority == expected_prio
+
+    knl = ref_knl
+    knl = lp.prioritize_loops(knl, ("g", "i"))
+    knl = lp.prioritize_loops(knl, ("h", "i"))
+    knl = lp.prioritize_loops(knl, ("k", "i"))
+    knl = lp.prioritize_loops(knl, ("g", "j"))
+    knl = lp.prioritize_loops(knl, ("h", "j"))
+    knl = lp.prioritize_loops(knl, ("k", "j"))
+    assert knl.loop_priority == expected_prio
+
+    knl = ref_knl
+    knl = lp.prioritize_loops(knl, ("k", "i"))
+    knl = lp.prioritize_loops(knl, ("k", "j"))
+    knl = lp.prioritize_loops(knl, ("i", "g"))
+    knl = lp.prioritize_loops(knl, ("i", "h"))
+    knl = lp.prioritize_loops(knl, ("j", "g"))
+    knl = lp.prioritize_loops(knl, ("j", "h"))
+    expected_prio = frozenset(
+        {('k', 'i'), ('k', 'j'),
+        ('i', 'g'), ('i', 'h'),
+        ('j', 'g'), ('j', 'h')})
+    assert knl.loop_priority == expected_prio
+
+    # check for deprication warnings
+    import warnings
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+
+        # try prioritize_loops()
+        lp.prioritize_loops(ref_knl, ("k", "g"))
+
+        # collect all deprication warnings
+        dep_warns = [dw for dw in w if issubclass(dw.category, DeprecationWarning)]
+
+        assert dep_warns  # assert that there is a deprecation warning
+        dep_warns_prioritize_loops = [dw_match for dw_match in dep_warns if
+            "prioritize_loops is deprecated. Use constrain_loop_nesting instead"
+            in str(dw_match.message)
+            ]
+        assert len(dep_warns_prioritize_loops) == 1
+
+        # try set_loop_priority()
+        lp.set_loop_priority(ref_knl, ("k", "g"))
+
+        # collect all deprication warnings
+        dep_warns = [dw for dw in w if issubclass(dw.category, DeprecationWarning)]
+
+        assert dep_warns  # assert that there is a deprecation warning
+        dep_warns_set_loop_priority = [dw_match for dw_match in dep_warns if
+            "set_loop_priority is deprecated. Use constrain_loop_nesting instead"
+            in str(dw_match.message)
+            ]
+        assert len(dep_warns_set_loop_priority) == 1
+
+
+def test_loop_constraint_strings_validity_check():
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k,xx]: 0<=g,h,i,j,k,xx<n }",
+            "out[g,h,i,j,k,xx] = 2*a[g,h,i,j,k,xx]",
+            assumptions="n >= 1",
+            )
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, "{g,h,k},{j,i}")
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, "{g,h,i,k},{j}")
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, "{g,{h,i,k}")
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, "{g,~h,i,k}")
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, "{g,#h,i,k}")
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, ("{g,{h}", "i,k"))
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, ("{g,~h}", "i,k"))
+        assert False
+    except ValueError as e:
+        assert "Unrecognized character(s)" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, ("k", "~{g,h}", "{g,h}"))
+        assert False
+    except ValueError as e:
+        assert "Complement (~) not allowed" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, ("k", "{i,j,k}", "{g,h}"))
+        assert False
+    except ValueError as e:
+        assert "contains cycle" in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_not_nest=("~j,i", "{j,i}"))
+        assert False
+    except ValueError as e:
+        assert ("Complements of sets containing multiple inames "
+            "must enclose inames in braces") in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_nest=("k", "{h}", "{j,i,}"))
+        assert False
+    except ValueError as e:
+        assert ("Found 2 inames but expected 3") in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_nest=("k", "{h}", "{j, x x, i}"))
+        assert False
+    except ValueError as e:
+        assert ("Found 4 inames but expected 3") in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_nest="{h}}")
+        assert False
+    except ValueError as e:
+        assert (
+            "Unrecognized character(s) ['{', '}', '}'] in nest string {h}}"
+            ) in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_nest="{h i j,,}")
+        assert False
+    except ValueError as e:
+        assert(
+            "Unrecognized character(s) [\'{\', \'}\'] in nest string {h i j,,}"
+            ) in str(e)
+
+    # TODO these should pass
+    """
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_nest=("{h}}", "i"))
+        assert False
+    except ValueError as e:
+        assert ("Unrecognized character(s) ['{', '}', '}'] in nest string {h}}"
+            ) in str(e)
+
+    try:
+        lp.constrain_loop_nesting(ref_knl, must_nest=("{h i j,,}", "k"))
+        assert False
+    except ValueError as e:
+        assert("Unrecognized character(s) [\'{\', \'}\'] in nest string {h i j,,}"
+            ) in str(e)
+    """
+
+    # valid syntax
+    lp.constrain_loop_nesting(ref_knl, must_not_nest=("~{j,i}", "{j,i}"))
+    lp.constrain_loop_nesting(ref_knl, must_not_nest=("{h}", "{j,i}"))
+    lp.constrain_loop_nesting(ref_knl, must_not_nest=("h", "{j,i}"))
+    lp.constrain_loop_nesting(ref_knl, must_nest=("k", "{h}", "{j,i}"))
+    lp.constrain_loop_nesting(ref_knl, must_nest=("k", "h", "{j,i}"))
+    lp.constrain_loop_nesting(ref_knl, must_not_nest="~j,j")
+    lp.constrain_loop_nesting(ref_knl, must_nest="k,h,j")
+
+    # handling spaces
+    knl = lp.constrain_loop_nesting(ref_knl, must_nest=("k", "{h }", " { j , i } "))
+    assert list(knl.loop_nest_constraints.must_nest)[0][0].inames == set("k")
+    assert list(knl.loop_nest_constraints.must_nest)[0][1].inames == set("h")
+    assert list(knl.loop_nest_constraints.must_nest)[0][2].inames == set(["j", "i"])
+
+    """
+    try:
+        knl = lp.constrain_loop_nesting(ref_knl, ("j", "{}"))
+        assert False
+    except ValueError as e:
+        assert "Empty iname sets not allowed" in str(e)
+
+    try:
+        knl = lp.constrain_loop_nesting(ref_knl, ("j", ""))
+        assert False
+    except ValueError as e:
+        assert "Empty iname sets not allowed" in str(e)
+    """
+
+
+def test_is_loop_nesting_valid():
+    from loopy.transform.iname import (
+        process_loop_nest_specification,
+        is_loop_nesting_valid,
+    )
+
+    all_inames = frozenset(["g", "h", "i", "j", "k"])
+
+    must_nest_constraints = [
+        process_loop_nest_specification(
+            nesting=("{g,h}", "~{g,h}"),
+            complement_sets_allowed=True),
+        ]
+    must_not_nest_constraints = [
+        process_loop_nest_specification(
+            nesting="k,~k",
+            complement_sets_allowed=True),
+        ]
+
+    loop_nests = set([("g", "h", "i", "j", "k"), ])
+    valid = is_loop_nesting_valid(
+        loop_nests, must_nest_constraints, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("g", "i", "h", "j", "k"), ])
+    valid = is_loop_nesting_valid(
+        loop_nests, must_nest_constraints, must_not_nest_constraints, all_inames)
+    assert not valid
+
+    loop_nests = set([("g", "h", "i", "k", "j"), ])
+    valid = is_loop_nesting_valid(
+        loop_nests, must_nest_constraints, must_not_nest_constraints, all_inames)
+    assert not valid
+
+    # now j, k must be innermost
+    must_not_nest_constraints = [
+        process_loop_nest_specification(("{k,j}", "~{k,j}")),
+        ]
+    loop_nests = set([("g", "i", "h", "j", "k"), ])
+    valid = is_loop_nesting_valid(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("g", "h", "i", "k", "j"), ])
+    valid = is_loop_nesting_valid(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("g", "i", "j", "h", "k"), ])
+    valid = is_loop_nesting_valid(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert not valid
+
+    loop_nests = set([("g", "h", "j", "k", "i"), ])
+    valid = is_loop_nesting_valid(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert not valid
+
+    loop_nests = set([("j", "k"), ])
+    valid = is_loop_nesting_valid(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("g", "k"), ])  # j not present
+    valid = is_loop_nesting_valid(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("g", "i"), ])  # j, k not present
+    valid = is_loop_nesting_valid(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("k",), ])  # only k present
+    valid = is_loop_nesting_valid(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+    loop_nests = set([("i",), ])
+    valid = is_loop_nesting_valid(
+        loop_nests, None, must_not_nest_constraints, all_inames)
+    assert valid
+
+
+def test_multiple_nest_constraints_added():
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k,x,y,z]: 0<=g,h,i,j,k,x,y,z<n }",
+            '''
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]
+            for x,y
+                out2[x,y] = 2*a2[x,y]
+                for z
+                    out3[x,y,z] = 2*a3[x,y,z]
+                end
+            end
+            ''',
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a,a2,a3": np.dtype(np.float32)})
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl, must_not_nest=("{k,i}", "~{k,i}"))
+    knl = lp.constrain_loop_nesting(
+        knl, must_nest=("g", "h,i"))
+    knl = lp.constrain_loop_nesting(
+        knl, must_nest=("g", "j", "k"))
+    knl = lp.constrain_loop_nesting(
+        knl, must_nest=("g", "j", "h"))
+    knl = lp.constrain_loop_nesting(
+        knl, must_nest=("i", "k"))
+    knl = lp.constrain_loop_nesting(
+        knl, must_nest=("x", "y"))
+
+    must_nest_knl = knl.loop_nest_constraints.must_nest
+    from loopy.transform.iname import UnexpandedInameSet
+    must_nest_expected = set([
+        (UnexpandedInameSet(set(["g"], )), UnexpandedInameSet(set(["h", "i"], ))),
+        (UnexpandedInameSet(set(["g"], )), UnexpandedInameSet(set(["j"], )),
+            UnexpandedInameSet(set(["k"], ))),
+        (UnexpandedInameSet(set(["g"], )), UnexpandedInameSet(set(["j"], )),
+            UnexpandedInameSet(set(["h"], ))),
+        (UnexpandedInameSet(set(["i"], )), UnexpandedInameSet(set(["k"], ))),
+        (UnexpandedInameSet(set(["x"], )), UnexpandedInameSet(set(["y"], ))),
+        ])
+    assert must_nest_knl == must_nest_expected
+
+    must_not_nest_knl = knl.loop_nest_constraints.must_not_nest
+    must_not_nest_expected = set([
+        (UnexpandedInameSet(set(["k", "i"], )), UnexpandedInameSet(set(["k", "i"], ),
+            complement=True)),
+        ])
+    assert must_not_nest_knl == must_not_nest_expected
+
+
+def test_incompatible_nest_constraints():
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k,x,y,z]: 0<=g,h,i,j,k,x,y,z<n }",
+            '''
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]
+            for x,y
+                out2[x,y] = 2*a2[x,y]
+                for z
+                    out3[x,y,z] = 2*a3[x,y,z]
+                end
+            end
+            ''',
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a,a2,a3": np.dtype(np.float32)})
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl, must_not_nest=("{k,i}", "~{k,i}"))
+
+    try:
+        knl = lp.constrain_loop_nesting(
+            knl, must_nest=("k", "h"))  # (should fail)
+        assert False
+    except ValueError as e:
+        assert "Nest constraint conflict detected" in str(e)
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl, must_nest=("g", "j", "k"))
+
+    try:
+        knl = lp.constrain_loop_nesting(
+            knl, must_nest=("j", "g"))  # (should fail)
+        assert False
+    except ValueError as e:
+        assert "priority cycle detected" in str(e)
+
+
+def test_vec_innermost():
+
+    def is_innermost(iname, linearization_items):
+        from loopy.schedule import (EnterLoop, LeaveLoop)
+
+        # find EnterLoop(iname) in linearization
+        enter_iname_idx = None
+        for i, linearization_item in enumerate(linearization_items):
+            if isinstance(linearization_item, EnterLoop) and (
+                    linearization_item.iname == iname):
+                enter_iname_idx = i
+                break
+        else:
+            # iname not found
+            return False
+
+        # now go through remaining linearization items after EnterLoop(iname)
+        for linearization_item in linearization_items[enter_iname_idx+1:]:
+            if isinstance(linearization_item, LeaveLoop):
+                # Break as soon as we find a LeaveLoop
+                # If this happens before we find an EnterLoop, iname is innermost
+                break
+            elif isinstance(linearization_item, EnterLoop):
+                # we found an EnterLoop inside iname
+                return False
+
+        return True
+
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
+            '''
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]
+            ''',
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
+
+    knl = ref_knl
+    knl = lp.tag_inames(knl, {"h": "vec"})
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert is_innermost("h", knl_linearized.linearization)
+
+    knl = ref_knl
+    knl = lp.tag_inames(knl, {"h": "vec", "g": "l.1", "i": "l.0"})
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert is_innermost("h", knl_linearized.linearization)
+
+    knl = ref_knl
+    knl = lp.tag_inames(
+        knl, {"h": "vec", "g": "l.1", "i": "l.0", "k": "unr"})
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert is_innermost("h", knl_linearized.linearization)
+
+    knl = ref_knl
+    knl = lp.tag_inames(knl, {"h": "vec"})
+    knl = lp.constrain_loop_nesting(knl, must_nest=("k", "i"))
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert is_innermost("h", knl_linearized.linearization)
+    lp.set_caching_enabled(True)
+
+    # try adding a must_nest constraint that conflicts with a vec tag
+    knl = ref_knl
+    knl = lp.tag_inames(knl, {"h": "vec"})
+    try:
+        lp.constrain_loop_nesting(knl, must_nest=("{g,h,i,j}", "{k}"))
+        assert False
+    except ValueError as e:
+        assert (
+            "iname h tagged with ConcurrentTag, "
+            "cannot use iname in must-nest constraint" in str(e))
+
+    # try adding a vec tag that conflicts with a must_nest constraint
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(knl, must_nest=("{g,h,i,j}", "{k}"))
+    try:
+        lp.tag_inames(knl, {"h": "vec"})
+        assert False
+    except ValueError as e:
+        assert (
+            "cannot tag 'h' as concurrent--iname involved "
+            "in must-nest constraint" in str(e))
+
+    # TODO try adding a vec tag forcing h to be innermost, but
+    # also add a must-not-nest constraint preventing h
+    # from nesting inside j
+
+
+def test_linearization_with_nesting_constraints():
+
+    def loop_order(linearization_items):
+        from loopy.schedule import EnterLoop
+        order = []
+        for linearization_item in linearization_items:
+            if isinstance(linearization_item, EnterLoop):
+                order.append(linearization_item.iname)
+        return order
+
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
+            '''
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]
+            ''',
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
+
+    # must_nest constraints
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "j", "h", "k", "g"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization) == ["i", "j", "h", "k", "g"]
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("k", "{g, h, i, j}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "k"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i, j}", "k"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[-1] == "k"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(knl_linearized.linearization)[-2:]) == set(["j", "k"])
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(knl_linearized.linearization)[3:]) == set(["j", "k"])
+    assert set(loop_order(knl_linearized.linearization)[1:3]) == set(["g", "h"])
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h}", "{j, k}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(knl_linearized.linearization)[3:]) == set(["j", "k"])
+    assert set(loop_order(knl_linearized.linearization)[1:3]) == set(["g", "h"])
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+
+    # must_not_nest constraints
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("~k", "k"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "k"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("k", "~k"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[-1] == "k"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("{j, k}", "~{j, k}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(knl_linearized.linearization)[-2:]) == set(["j", "k"])
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("{j, k}", "~{j, k}"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(knl_linearized.linearization)[3:]) == set(["j", "k"])
+    assert set(loop_order(knl_linearized.linearization)[1:3]) == set(["g", "h"])
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+
+    # must_nest + must_not_nest
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        must_not_nest=("i", "{g, h}"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert set(loop_order(knl_linearized.linearization)[3:]) == set(["j", "k"])
+    assert set(loop_order(knl_linearized.linearization)[0:2]) == set(["g", "h"])
+    assert loop_order(knl_linearized.linearization)[2] == "i"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("i", "~i"),
+        )
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[-1] == "i"
+
+    # contradictory must_not_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("~k", "k"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("k", "h"),
+        )
+
+    try:
+        lp.get_one_linearized_kernel(
+            lp.preprocess_kernel(knl),
+            debug_args={"interactive": False},
+            )
+        assert False
+    except RuntimeError as e:
+        assert "no valid schedules found" in str(e)
+
+
+def test_nesting_constraints_transforms(ctx_factory):
+    lp.set_caching_enabled(False)
+
+    def loop_order(linearization_items):
+        from loopy.schedule import EnterLoop
+        order = []
+        for linearization_item in linearization_items:
+            if isinstance(linearization_item, EnterLoop):
+                order.append(linearization_item.iname)
+        return order
+
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
+            '''
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]  {id=insn}
+            ''',
+            assumptions="n >= 1",
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
+
+    # split_iname
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("k", "{g, h, i, j}"),
+        )
+    knl = lp.split_iname(knl, "j", 4)
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "k"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i, j}", "k"),
+        )
+    knl = lp.split_iname(knl, "j", 4)
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[-1] == "k"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        #must_nest=("{g, h, i}", "{j, k}"),
+        must_not_nest=("{j, k}", "~{j, k}"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h}"),
+        )
+    knl = lp.split_iname(knl, "g", 4)
+    knl = lp.split_iname(knl, "j", 4)
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+    assert set(loop_order(knl_linearized.linearization)[1:4]) == set(
+        ["g_outer", "g_inner", "h"])
+    assert set(loop_order(knl_linearized.linearization)[4:]) == set(
+        ["j_outer", "j_inner", "k"])
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h, j, k}"),
+        must_not_nest=("h", "g"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        )
+    knl = lp.split_iname(knl, "g", 4)
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+    assert loop_order(knl_linearized.linearization)[1:4] == [
+        "g_outer", "g_inner", "h"]
+    assert set(loop_order(knl_linearized.linearization)[4:]) == set(["j", "k"])
+
+    # rename_iname + remove_unused_inames
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h, j, k}"),
+        must_not_nest=("h", "g"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        )
+    knl = lp.rename_iname(knl, "g", "g_new")
+    knl = lp.rename_iname(knl, "h", "h_new")
+    knl = lp.rename_iname(knl, "i", "i_new")
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "i_new"
+    assert loop_order(knl_linearized.linearization)[1:3] == ["g_new", "h_new"]
+    assert set(loop_order(knl_linearized.linearization)[3:]) == set(["j", "k"])
+
+    # should error when constrained inames are tagged as concurrent
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{j, k}"),
+        must_not_nest=("h", "g"),
+        )
+    try:
+        lp.tag_inames(knl, {"i": "l.0"})
+        assert False
+    except ValueError as e:
+        assert (
+            "cannot tag 'i' as concurrent--iname involved in must-nest constraint"
+            in str(e))
+
+    # join_inames (errors if domain bound is variable)
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<1024 }",
+            '''
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]  {id=insn}
+            ''',
+            )
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h, j, k}"),
+        must_not_nest=("h", "g"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+    assert loop_order(knl_linearized.linearization)[1] == "gh"
+    assert set(loop_order(knl_linearized.linearization)[2:]) == set(["j", "k"])
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h, j, k}"),
+        must_not_nest=("h", "g"),
+        )
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{g, h, i}", "{j, k}"),
+        )
+    knl = lp.join_inames(knl, inames=["j", "k"], new_iname="jk")
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+    assert loop_order(knl_linearized.linearization)[1:3] == ["g", "h"]
+    assert loop_order(knl_linearized.linearization)[3] == "jk"
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("h", "i", "g", "{j, k}"),
+        )
+    knl = lp.join_inames(knl, inames=["i", "g"], new_iname="ig")
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+    assert loop_order(knl_linearized.linearization)[0] == "h"
+    assert loop_order(knl_linearized.linearization)[1] == "ig"
+    assert set(loop_order(knl_linearized.linearization)[2:4]) == set(["j", "k"])
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h}", "{j, k}"),
+        )
+    try:
+        lp.join_inames(knl, inames=["i", "k"], new_iname="ik")
+        assert False
+    except ValueError as e:
+        assert "cycle" in str(e)
+
+    # duplicate_inames
+
+    # TODO works when I delete the cache, sometimes...
+    # should error when kernel has unused inames
+    """
+    from loopy.diagnostic import LoopyError
+    knl = ref_knl
+    knl = lp.duplicate_inames(
+        knl,
+        inames=["g", "i"],
+        within="id:insn",
+        new_inames=["g_dup", "i_dup"])
+    try:
+        lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+        assert False
+    except LoopyError as e:
+        assert "cannot schedule a kernel with unused inames" in str(e)
+    """
+
+    ref_knl2 = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<n }",
+            '''
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]  {id=insn}
+            out0[g,h,i,j,k] = 2*a0[g,h,i,j,k]  {id=insn0}
+            ''',
+            assumptions="n >= 1",
+            )
+    ref_knl2 = lp.add_and_infer_dtypes(
+        ref_knl2,
+        {"a": np.dtype(np.float32), "a0": np.dtype(np.float32)})
+
+    knl = ref_knl2
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h, j, k}"),
+        )
+    knl = lp.duplicate_inames(
+        knl,
+        inames=["g", "h"],
+        within="id:insn0",
+        new_inames=["gg", "hh"])
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+
+    # i must be outermost
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+    # j and k are shared between both insns, so must come next
+    assert set(loop_order(knl_linearized.linearization)[1:3]) == set(["j", "k"])
+    # everything else should come after that
+    assert set(loop_order(knl_linearized.linearization)[3:]) == set(
+        ["g", "h", "gg", "hh"])
+
+    # duplicate inames
+    knl = ref_knl2
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("~{i}", "i"),
+        )
+    knl = lp.duplicate_inames(
+        knl,
+        inames=["g", "h"],
+        within="id:insn0",
+        new_inames=["gg", "hh"])
+    knl_linearized = lp.get_one_linearized_kernel(lp.preprocess_kernel(knl))
+
+    assert loop_order(knl_linearized.linearization)[0] == "i"
+
+
+def test_legacy_kernel_dependencies():
+
+    from loopy.schedule.checker import (
+        create_dependencies_from_legacy_knl,
+        check_linearization_validity,
+    )
+
+    # test both creation of legacy kernel dependencies
+    # and linearization creation WRT these dependencies
+
+    # TODO duplicating some tests from linearization checker tests...
+
+    # multiple separate nested loops -------
+    knl = lp.make_kernel(
+        [
+            "{[i]: 0<=i<pi}",
+            "{[k]: 0<=k<pk}",
+            "{[j]: 0<=j<pj}",
+            "{[t]: 0<=t<pt}",
+        ],
+        """
+        for i
+            for k
+                <>temp = b[i,k]  {id=insn_a}
+            end
+            for j
+                a[i,j] = temp + 1  {id=insn_b,dep=insn_a}
+                c[i,j] = d[i,j]  {id=insn_c}
+            end
+        end
+        for t
+            e[t] = f[t]  {id=insn_d}
+        end
+        """,
+        name="example",
+        assumptions="pi,pj,pk,pt >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.add_and_infer_dtypes(
+            knl,
+            {"b": np.float32, "d": np.float32, "f": np.float32})
+    knl = lp.prioritize_loops(knl, "i,k")
+    knl = lp.prioritize_loops(knl, "i,j")
+    unprocessed_knl = knl.copy()
+
+    deps = create_dependencies_from_legacy_knl(
+        unprocessed_knl)
+    knl = lp.add_dependencies_v2(knl, deps)
+
+    # get a linearization to check
+    knl = lp.preprocess_kernel(knl)
+    knl = lp.get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+    # matmul -------
+    bsize = 16
+    knl = lp.make_kernel(
+            "{[i,k,j]: 0<=i<n and 0<=k<m and 0<=j<ell}",
+            [
+                "c[i, j] = sum(k, a[i, k]*b[k, j])"
+            ],
+            name="matmul",
+            assumptions="n,m,ell >= 1",
+            lang_version=(2018, 2),
+            )
+    knl = lp.add_and_infer_dtypes(knl, dict(a=np.float32, b=np.float32))
+    knl = lp.split_iname(knl, "i", bsize, outer_tag="g.0", inner_tag="l.1")
+    knl = lp.split_iname(knl, "j", bsize, outer_tag="g.1", inner_tag="l.0")
+    knl = lp.split_iname(knl, "k", bsize)
+    knl = lp.add_prefetch(knl, "a", ["k_inner", "i_inner"], default_tag="l.auto")
+    knl = lp.add_prefetch(knl, "b", ["j_inner", "k_inner"], default_tag="l.auto")
+    knl = lp.prioritize_loops(knl, "k_outer,k_inner")
+
+    unprocessed_knl = knl.copy()
+
+    deps = create_dependencies_from_legacy_knl(
+        unprocessed_knl)
+    knl = lp.add_dependencies_v2(knl, deps)
+
+    # get a linearization to check
+    knl = lp.preprocess_kernel(knl)
+    knl = lp.get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+    # scan -------
+    stride = 1
+    n_scan = 16
+    knl = lp.make_kernel(
+        "[n] -> {[i,j]: 0<=i<n and 0<=j<=%d*i}" % stride,
+        """
+        a[i] = sum(j, j**2)
+        """,
+        name="scan",
+        lang_version=(2018, 2),
+        )
+
+    knl = lp.fix_parameters(knl, n=n_scan)
+    knl = lp.realize_reduction(knl, force_scan=True)
+
+    # dependent_domain -------
+
+    knl = lp.make_kernel(
+        [
+            "[n] -> {[i]: 0<=i<n}",
+            "{[j]: 0<=j<=2*i}"
+        ],
+        """
+        a[i] = sum(j, j**2) {id=scan}
+        """,
+        name="dependent_domain",
+        lang_version=(2018, 2),
+        )
+    knl = lp.realize_reduction(knl, force_scan=True)
+
+    unprocessed_knl = knl.copy()
+
+    deps = create_dependencies_from_legacy_knl(
+        unprocessed_knl)
+    knl = lp.add_dependencies_v2(knl, deps)
+
+    # get a linearization to check
+    knl = lp.preprocess_kernel(knl)
+    knl = lp.get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+    # stroud_bernstein -------
+    knl = lp.make_kernel(
+            "{[el, i2, alpha1,alpha2]: \
+                    0 <= el < nels and \
+                    0 <= i2 < nqp1d and \
+                    0 <= alpha1 <= deg and 0 <= alpha2 <= deg-alpha1 }",
+            """
+            for el,i2
+                <> xi = qpts[1, i2]
+                <> s = 1-xi
+                <> r = xi/s
+                <> aind = 0 {id=aind_init}
+                for alpha1
+                    <> w = s**(deg-alpha1) {id=init_w}
+                    for alpha2
+                        tmp[el,alpha1,i2] = tmp[el,alpha1,i2] + w * coeffs[aind] \
+                                {id=write_tmp,dep=init_w:aind_init}
+                        w = w * r * ( deg - alpha1 - alpha2 ) / (1 + alpha2) \
+                                {id=update_w,dep=init_w:write_tmp}
+                        aind = aind + 1 \
+                                {id=aind_incr,dep=aind_init:write_tmp:update_w}
+                    end
+                end
+            end
+            """,
+            [lp.GlobalArg("coeffs", None, shape=None), "..."],
+            name="stroud_bernstein_orig", assumptions="deg>=0 and nels>=1")
+    knl = lp.add_and_infer_dtypes(knl,
+        dict(coeffs=np.float32, qpts=np.int32))
+    knl = lp.fix_parameters(knl, nqp1d=7, deg=4)
+    knl = lp.split_iname(knl, "el", 16, inner_tag="l.0")
+    knl = lp.split_iname(knl, "el_outer", 2, outer_tag="g.0",
+        inner_tag="ilp", slabs=(0, 1))
+    knl = lp.tag_inames(knl, dict(i2="l.1", alpha1="unr", alpha2="unr"))
+
+    unprocessed_knl = knl.copy()
+
+    deps = create_dependencies_from_legacy_knl(
+        unprocessed_knl)
+    knl = lp.add_dependencies_v2(knl, deps)
+
+    # get a linearization to check
+    knl = lp.preprocess_kernel(knl)
+    knl = lp.get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+    # nop -------
+    knl = lp.make_kernel(
+        [
+            "{[b]: b_start<=b<b_end}",
+            "{[c]: c_start<=c<c_end}",
+        ],
+        """
+         for b
+          <> c_end = 2
+          for c
+           ... nop
+          end
+         end
+        """,
+        "...",
+        seq_dependencies=True)
+
+    unprocessed_knl = knl.copy()
+
+    deps = create_dependencies_from_legacy_knl(
+        unprocessed_knl)
+    knl = lp.add_dependencies_v2(knl, deps)
+
+    # get a linearization to check
+    knl = lp.preprocess_kernel(knl)
+    knl = lp.get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+    # multi_domain -------
+    knl = lp.make_kernel(
+        [
+            "{[i]: 0<=i<ni}",
+            "{[j]: 0<=j<nj}",
+            "{[k]: 0<=k<nk}",
+            "{[x,xx]: 0<=x,xx<nx}",
+        ],
+        """
+        for x,xx
+          for i
+            <>acc = 0 {id=insn0}
+            for j
+              for k
+                acc = acc + j + k {id=insn1,dep=insn0}
+              end
+            end
+          end
+        end
+        """,
+        name="nest_multi_dom",
+        assumptions="ni,nj,nk,nx >= 1",
+        lang_version=(2018, 2)
+        )
+    knl = lp.prioritize_loops(knl, "x,xx,i")
+    knl = lp.prioritize_loops(knl, "i,j")
+    knl = lp.prioritize_loops(knl, "j,k")
+
+    unprocessed_knl = knl.copy()
+
+    deps = create_dependencies_from_legacy_knl(
+        unprocessed_knl)
+    knl = lp.add_dependencies_v2(knl, deps)
+
+    # get a linearization to check
+    knl = lp.preprocess_kernel(knl)
+    knl = lp.get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+    # loop_carried_deps -------
+    knl = lp.make_kernel(
+        "{[i]: 0<=i<n}",
+        """
+        <>acc0 = 0 {id=insn0}
+        for i
+          acc0 = acc0 + i {id=insn1,dep=insn0}
+          <>acc2 = acc0 + i {id=insn2,dep=insn1}
+          <>acc3 = acc2 + i {id=insn3,dep=insn2}
+          <>acc4 = acc0 + i {id=insn4,dep=insn1}
+        end
+        """,
+        name="loop_carried_deps",
+        assumptions="n >= 1",
+        lang_version=(2018, 2)
+        )
+
+    unprocessed_knl = knl.copy()
+
+    deps = create_dependencies_from_legacy_knl(
+        unprocessed_knl)
+    knl = lp.add_dependencies_v2(knl, deps)
+
+    # get a linearization to check
+    knl = lp.preprocess_kernel(knl)
+    knl = lp.get_one_linearized_kernel(knl)
+    linearization_items = knl.linearization
+
+    linearization_is_valid = check_linearization_validity(
+        unprocessed_knl, deps, linearization_items)
+    assert linearization_is_valid
+
+
+def test_iname_coalescing_in_loop_nest_constraints():
+    lp.set_caching_enabled(False)
+    # without ^this, changing these tests has no effect (cached version gets used)
+
+    def get_sets_of_inames(iname_sets_tuple, iname_universe):
+        # convert UnexpandedInameSets to sets
+        sets_of_inames = []
+        for iname_set in iname_sets_tuple:
+            sets_of_inames.append(
+                iname_set.get_inames_represented(iname_universe))
+        return sets_of_inames
+
+    ref_knl = lp.make_kernel(
+            "{ [g,h,i,j,k]: 0<=g,h,i,j,k<1024 }",
+            '''
+            out[g,h,i,j,k] = 2*a[g,h,i,j,k]  {id=insn}
+            ''',
+            )
+    # (join_inames errors if domain bound is variable)
+
+    ref_knl = lp.add_and_infer_dtypes(ref_knl, {"a": np.dtype(np.float32)})
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "g", "h", "j", "k"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    new_must_nest = get_sets_of_inames(
+        list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
+    expected_must_nest = [
+        set(["i", ]), set(["gh", ]), set(["j", ]), set(["k", ])]
+    assert new_must_nest == expected_must_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{i, g}", "h", "j", "k"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    new_must_nest = get_sets_of_inames(
+        list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
+    expected_must_nest = [
+        set(["i", ]), set(["gh", ]), set(["j", ]), set(["k", ])]
+    assert new_must_nest == expected_must_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "g", "{h, j}", "k"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    new_must_nest = get_sets_of_inames(
+        list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
+    expected_must_nest = [
+        set(["i", ]), set(["gh", ]), set(["j", ]), set(["k", ])]
+    assert new_must_nest == expected_must_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "g", "{h, j, k}"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    new_must_nest = get_sets_of_inames(
+        list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
+    expected_must_nest = [
+        set(["i", ]), set(["gh", ]), set(["j", "k"])]
+    assert new_must_nest == expected_must_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "{g, h}", "j", "k"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    new_must_nest = get_sets_of_inames(
+        list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
+    expected_must_nest = [
+        set(["i", ]), set(["gh", ]), set(["j", ]), set(["k", ])]
+    assert new_must_nest == expected_must_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{i, g}", "{h, j, k}"),
+        )
+    knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+    new_must_nest = get_sets_of_inames(
+        list(knl.loop_nest_constraints.must_nest)[0], knl.all_inames())
+    expected_must_nest = [
+        set(["i", ]), set(["gh", ]), set(["j", "k"])]
+    assert new_must_nest == expected_must_nest
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("i", "g", "j", "h", "k"),
+        )
+    try:
+        knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+        assert False
+    except ValueError as e:
+        assert "contains cycle" in str(e)
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{i, g}", "j", "{h, k}"),
+        )
+    try:
+        knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+        assert False
+    except ValueError as e:
+        assert "contains cycle" in str(e)
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_nest=("{i, h}", "j", "{g, k}"),
+        )
+    try:
+        knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+        assert False
+    except ValueError as e:
+        assert "nestings violate existing must-nest" in str(e)
+
+    knl = ref_knl
+    knl = lp.constrain_loop_nesting(
+        knl,
+        must_not_nest=("g", "h"),
+        )
+    try:
+        knl = lp.join_inames(knl, inames=["g", "h"], new_iname="gh")
+        assert False
+    except ValueError as e:
+        assert "nestings violate existing must-not-nest" in str(e)
+
+
 def test_globals_decl_once_with_multi_subprogram(ctx_factory):
     ctx = ctx_factory()
     queue = cl.CommandQueue(ctx)
@@ -705,8 +2025,16 @@ def test_ilp_loop_bound(ctx_factory):
 
     ref_knl = knl
 
-    knl = lp.prioritize_loops(knl, "j,i,k")
+    # before new loop nest constraints/scheduling,
+    # prioritized inames could also be parallel:
+    #knl = lp.prioritize_loops(knl, "j,i,k")
+    #knl = lp.split_iname(knl,  "k", 4, inner_tag="ilp")
+
+    # after new loop nest constraints/scheduling...
+    # TODO this produces a different linearization, something wrong with ILP?
+    knl = lp.constrain_loop_nesting(knl, must_nest=("i", "j"))
     knl = lp.split_iname(knl,  "k", 4, inner_tag="ilp")
+    knl = lp.constrain_loop_nesting(knl, must_nest=("i", "k_outer"))
 
     lp.auto_test_vs_ref(ref_knl, ctx, knl,
             parameters=dict(
@@ -742,7 +2070,6 @@ def test_slab_decomposition_does_not_double_execute(ctx_factory):
         knl = ref_knl
         knl = lp.split_iname(knl, "i", 4, slabs=(0, 1), inner_tag="unr",
                 outer_tag=outer_tag)
-        knl = lp.prioritize_loops(knl, "i_outer")
 
         a = cl.array.empty(queue, 20, np.float32)
         a.fill(17)
diff --git a/test/test_numa_diff.py b/test/test_numa_diff.py
index 54b608a183840cc5d33f1e738f36fc605d16d94a..0ff44bf86b8755e17b0144b4a57f1e0092353218 100644
--- a/test/test_numa_diff.py
+++ b/test/test_numa_diff.py
@@ -51,6 +51,8 @@ from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2  # noqa
 @pytest.mark.parametrize("Nq", [7])
 @pytest.mark.parametrize("opt_level", [11])
 def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level):  # noqa
+    # TODO linearization search for this kernel finds dead ends, no linearization
+    1/0  # TODO this prevents this test from running forever, remove when fixed
     ctx = ctx_factory()
 
     filename = os.path.join(os.path.dirname(__file__), "strongVolumeKernels.f90")
@@ -74,7 +76,6 @@ def test_gnuma_horiz_kernel(ctx_factory, ilp_multiple, Nq, opt_level):  # noqa
           set_q_storage_format, set_D_storage_format)
 
     hsv = lp.fix_parameters(hsv, Nq=Nq)
-    hsv = lp.prioritize_loops(hsv, "e,k,j,i")
     hsv = lp.tag_inames(hsv, dict(e="g.0", j="l.1", i="l.0"))
     hsv = lp.assume(hsv, "elements >= 1")
 
diff --git a/test/test_transform.py b/test/test_transform.py
index ffef893b05fbca5a0d244ff17f379e1bb5cf27a1..ebf6b36f062942440886a6713a30f96ef7c51622 100644
--- a/test/test_transform.py
+++ b/test/test_transform.py
@@ -66,7 +66,6 @@ def test_chunk_iname(ctx_factory):
 
     ref_knl = knl
     knl = lp.chunk_iname(knl, "i", 3, inner_tag="l.0")
-    knl = lp.prioritize_loops(knl, "i_outer, i_inner")
     lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters=dict(n=130))
 
 
@@ -445,7 +444,9 @@ def test_precompute_with_preexisting_inames(ctx_factory):
     knl = lp.precompute(knl, "D2_subst", "i,k", default_tag="for",
             precompute_inames="ii,jj")
 
-    knl = lp.prioritize_loops(knl, "ii,jj,e,j,k")
+    knl = lp.prioritize_loops(knl, "ii,jj")
+    knl = lp.prioritize_loops(knl, "e,j")
+    knl = lp.prioritize_loops(knl, "e,k")
 
     lp.auto_test_vs_ref(
             ref_knl, ctx, knl,