From 30e44717df27c0ccea4ba470576b5720f16d5ba1 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Mon, 7 Nov 2011 11:44:03 -0500
Subject: [PATCH] Barrier Insertion: Be less strict in dep checking when
 checking for pre-barriers.

---
 loopy/schedule.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/loopy/schedule.py b/loopy/schedule.py
index bd5151da9..af7b35f09 100644
--- a/loopy/schedule.py
+++ b/loopy/schedule.py
@@ -44,7 +44,7 @@ def gather_schedule_subloop(schedule, start_idx):
 
 
 
-def get_barrier_needing_dependency(kernel, target, source):
+def get_barrier_needing_dependency(kernel, target, source, unordered=False):
     from loopy.kernel import Instruction
     if not isinstance(source, Instruction):
         source = kernel.id_to_insn[source]
@@ -64,7 +64,8 @@ def get_barrier_needing_dependency(kernel, target, source):
     war = tgt_write & src_read
 
     for var_name in raw | war:
-        assert source.id in target.insn_deps
+        if not unordered:
+            assert source.id in target.insn_deps
         return (target, source, var_name)
 
     if source is target:
@@ -83,10 +84,18 @@ def get_barrier_needing_dependency(kernel, target, source):
 
 
 def get_barrier_dependent_in_schedule(kernel, source, schedule):
+    """
+    :arg source: an instruction id for the source of the dependency
+    """
+    unordered = False
+    for sched_item in schedule:
+        if isinstance(sched_item, RunInstruction) and sched_item.insn_id == source:
+            unordered = True
+
     for sched_item in schedule:
         if isinstance(sched_item, RunInstruction):
             temp_res = get_barrier_needing_dependency(
-                    kernel, sched_item.insn_id, source)
+                    kernel, sched_item.insn_id, source, unordered=unordered)
             if temp_res:
                 return temp_res
 
-- 
GitLab