diff --git a/MEMO b/MEMO
index 05eaf2de75a810bd9ff726646ac8ae53d56bd382..c13497a091cdbcd4e70448f9277e56bceadcb977 100644
--- a/MEMO
+++ b/MEMO
@@ -36,19 +36,17 @@ Things to consider
 
 - Loopy as a data model for implementing custom rewritings
 
+- We won't generate WAWs barrier-needing dependencies
+  from one instruction to itself.
+
 To-do
 ^^^^^
 
 - Just touching a variable written to by a non-idempotent
   instruction makes that instruction also not idempotent
 
-- assert dependencies <= parent_inames in loopy/__init__.py
-  ???
-
 - user interface for dim length prescription
 
-- Way too many barriers in SEM test.
-
 - Deal with equality constraints.
   (These arise, e.g., when partitioning a loop of length 16 into 16s.)
 
@@ -91,6 +89,9 @@ Future ideas
 Dealt with
 ^^^^^^^^^^
 
+- assert dependencies <= parent_inames in loopy/__init__.py
+  -> Yes, this must be the case.
+
 - Give a good error message if a parameter assignment in get_problems()
   is missing.
 
diff --git a/loopy/__init__.py b/loopy/__init__.py
index 46f6a76555afcf51c65cb4b2fcc7d7283fbf397a..f0fe3f0fda31cbc07d0d3b295dc21d8c0d12f011 100644
--- a/loopy/__init__.py
+++ b/loopy/__init__.py
@@ -234,8 +234,7 @@ def realize_cse(kernel, cse_tag, dtype, duplicate_inames=[], parallel_inames=Non
         dependencies = IndexVariableFinder(
                 include_reduction_inames=False)(expr.child)
 
-        # FIXME: can happen with
-        # assert dependencies <= parent_inames
+        assert dependencies <= parent_inames
 
         for iname in parent_inames:
             if iname in duplicate_inames:
diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py
index 788625e0ee7cf6e0229cdf30a7b2776bacdfbad6..f4e1d1c0a619ae6fdcd863bc30c719f43b1de57e 100644
--- a/loopy/codegen/control.py
+++ b/loopy/codegen/control.py
@@ -55,8 +55,14 @@ def generate_code_for_sched_index(kernel, sched_index, codegen_state):
     elif isinstance(sched_item, Barrier):
         from loopy.codegen import GeneratedInstruction
         from cgen import Statement as S
+
+        if sched_item.comment:
+            comment = " /* %s */" % sched_item.comment
+        else:
+            comment = ""
+
         return GeneratedInstruction(
-                ast=S("barrier(CLK_LOCAL_MEM_FENCE)"),
+                ast=S("barrier(CLK_LOCAL_MEM_FENCE)%s" % comment),
                 implemented_domain=None)
 
     elif isinstance(sched_item, RunInstruction):
diff --git a/loopy/kernel.py b/loopy/kernel.py
index 1cc356f77d4d19a4c9c334a1de2bce49a83b4f63..db1a326a7c79c2be2527eb7879da1bab597e80ce 100644
--- a/loopy/kernel.py
+++ b/loopy/kernel.py
@@ -310,6 +310,12 @@ class Instruction(Record):
 
         return result
 
+    @memoize_method
+    def get_read_var_names(self):
+        from loopy.symbolic import DependencyMapper
+        return set(var.name for var in
+                DependencyMapper(composite_leaves=False)(self.expression))
+
 # }}}
 
 # {{{ reduction operations
@@ -774,6 +780,13 @@ class LoopKernel(Record):
 
         return tup_to_exprs(grid_size), tup_to_exprs(group_size)
 
+    @memoize_method
+    def local_var_names(self):
+        return set(
+                tv.name
+            for tv in self.temporary_variables.itervalues()
+            if tv.is_local)
+
     def local_mem_use(self):
         return sum(lv.nbytes for lv in self.temporary_variables.itervalues()
                 if lv.is_local)
diff --git a/loopy/preprocess.py b/loopy/preprocess.py
index 990a4fc7f2cb526b73a7afa514f9e571c85c0c40..85ecca37d997a8f98abab945291acaf2f63be6eb 100644
--- a/loopy/preprocess.py
+++ b/loopy/preprocess.py
@@ -193,8 +193,7 @@ def find_accessors(kernel, readers):
 
     for insn in kernel.instructions:
         if readers:
-            from loopy.symbolic import DependencyMapper
-            var_names = DependencyMapper()(insn.expression) & admissible_vars
+            var_names = insn.get_read_var_names() & admissible_vars
         else:
             var_name = insn.get_assignee_var_name()
 
diff --git a/loopy/schedule.py b/loopy/schedule.py
index 07ce6c8cbff67ce04ca8fc06fa6545caec4932f1..d9e52dfa8aedacf003f3b3190dc57ff5dacb2297 100644
--- a/loopy/schedule.py
+++ b/loopy/schedule.py
@@ -17,7 +17,7 @@ class RunInstruction(Record):
     __slots__ = ["insn_id"]
 
 class Barrier(Record):
-    __slots__ = []
+    __slots__ = ["comment"]
 
 # }}}
 
@@ -43,12 +43,52 @@ def gather_schedule_subloop(schedule, start_idx):
 
 
 
-def has_dependent_in_schedule(kernel, insn_id, schedule):
-    from pytools import any
-    return any(sched_item
-            for sched_item in schedule
-            if isinstance(sched_item, RunInstruction)
-            and kernel.id_to_insn[sched_item.insn_id].insn_deps)
+
+def get_barrier_needing_dependency(kernel, target, source):
+    from loopy.kernel import Instruction
+    if not isinstance(source, Instruction):
+        source = kernel.id_to_insn[source]
+    if not isinstance(target, Instruction):
+        target = kernel.id_to_insn[target]
+
+    local_vars = kernel.local_var_names()
+
+    tgt_write = set([target.get_assignee_var_name()]) & local_vars
+    tgt_read = target.get_read_var_names() & local_vars
+
+    src_write = set([source.get_assignee_var_name()]) & local_vars
+    src_read = source.get_read_var_names() & local_vars
+
+    waw = tgt_write & src_write
+    raw = tgt_read & src_write
+    war = tgt_write & src_read
+
+    for var_name in raw | war:
+        assert source.id in target.insn_deps
+        return (target, source, var_name)
+
+    if source is target:
+        return None
+
+    for var_name in waw:
+        assert (source.id in target.insn_deps
+                or source is target)
+        return (target, source, var_name)
+
+    return None
+
+
+
+
+
+
+def get_barrier_dependent_in_schedule(kernel, source, schedule):
+    for sched_item in schedule:
+        if isinstance(sched_item, RunInstruction):
+            temp_res = get_barrier_needing_dependency(
+                    kernel, sched_item.insn_id, source)
+            if temp_res:
+                return temp_res
 
 
 
@@ -289,8 +329,7 @@ def insert_barriers(kernel, schedule, level=0):
     # dependencies for which the 'normal' mechanism below will generate
     # barriers.
 
-    def issue_barrier(is_pre_barrier):
-        owed_barriers.clear()
+    def issue_barrier(is_pre_barrier, dep):
         if result and isinstance(result[-1], Barrier):
             return
 
@@ -298,8 +337,18 @@ def insert_barriers(kernel, schedule, level=0):
             if loop_had_barrier[0] or level == 0:
                 return
 
+        owed_barriers.clear()
+
+        cmt = None
+        if dep is not None:
+            target, source, var = dep
+            if is_pre_barrier:
+                cmt = "pre-barrier: %s" % var
+            else:
+                cmt = "dependency: %s" % var
+
         loop_had_barrier[0] = True
-        result.append(Barrier())
+        result.append(Barrier(comment=cmt))
 
     i = 0
     while i < len(schedule):
@@ -313,8 +362,9 @@ def insert_barriers(kernel, schedule, level=0):
             # (i.e. if anything *in* the loop depends on something beforehand)
 
             for insn_id in owed_barriers:
-                if has_dependent_in_schedule(kernel, insn_id, subloop):
-                    issue_barrier(is_pre_barrier=False)
+                dep = get_barrier_dependent_in_schedule(kernel, insn_id, subloop)
+                if dep:
+                    issue_barrier(is_pre_barrier=False, dep=dep)
                     break
 
             # }}}
@@ -326,9 +376,10 @@ def insert_barriers(kernel, schedule, level=0):
 
             if not loop_had_barrier[0]:
                 for insn_id in sub_owed_barriers:
-                    if has_dependent_in_schedule(
-                            kernel, insn_id, schedule):
-                        issue_barrier(is_pre_barrier=True)
+                    dep = get_barrier_dependent_in_schedule(
+                            kernel, insn_id, schedule)
+                    if dep:
+                        issue_barrier(is_pre_barrier=True, dep=dep)
 
             # }}}
 
@@ -347,20 +398,23 @@ def insert_barriers(kernel, schedule, level=0):
 
             # {{{ issue dependency-based barriers for this instruction
 
-            if set(insn.insn_deps) & owed_barriers:
-                issue_barrier(is_pre_barrier=False)
+            for dep_src_insn_id in set(insn.insn_deps) & owed_barriers:
+                dep = get_barrier_needing_dependency(kernel, insn, dep_src_insn_id)
+                if dep:
+                    issue_barrier(is_pre_barrier=False, dep=dep)
 
             # }}}
 
             assignee_temp_var = kernel.temporary_variables.get(
                     insn.get_assignee_var_name())
             if assignee_temp_var is not None and assignee_temp_var.is_local:
+                dep = get_barrier_dependent_in_schedule(kernel, insn.id, schedule)
+
                 if level == 0:
-                    assert has_dependent_in_schedule(
-                            kernel, insn.id, schedule)
+                    assert dep
 
-                if has_dependent_in_schedule(kernel, insn.id, schedule):
-                    issue_barrier(is_pre_barrier=True)
+                if dep:
+                    issue_barrier(is_pre_barrier=True, dep=dep)
 
                 result.append(sched_item)
                 owed_barriers.add(insn.id)