From 942e7d611f3c991ce785b5224329a10614644af9 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Fri, 24 Aug 2012 00:29:55 -0400
Subject: [PATCH] Fix slab decomposition for hw-parallel loops.

---
 loopy/codegen/loop.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py
index 70ac54787..faeefa025 100644
--- a/loopy/codegen/loop.py
+++ b/loopy/codegen/loop.py
@@ -155,6 +155,14 @@ def generate_unroll_loop(kernel, sched_index, codegen_state):
 
 # }}}
 
+def intersect_kernel_with_slab(kernel, slab, iname):
+    hdi = kernel.get_home_domain_index(iname)
+    home_domain = kernel.domains[hdi]
+    new_domains = kernel.domains[:]
+    new_domains[hdi] = home_domain & isl.align_spaces(slab, home_domain)
+    return kernel.copy(domains=new_domains)
+
+
 # {{{ hw-parallel loop
 
 def set_up_hw_parallel_loops(kernel, sched_index, codegen_state, hw_inames_left=None):
@@ -223,9 +231,12 @@ def set_up_hw_parallel_loops(kernel, sched_index, codegen_state, hw_inames_left=
         if len(slabs) == 1:
             cmt = None
 
-        new_codegen_state = codegen_state.intersect(slab)
+        # Have the conditional infrastructure generate the
+        # slabbin conditionals.
+        slabbed_kernel = intersect_kernel_with_slab(kernel, slab, iname)
+
         inner = set_up_hw_parallel_loops(
-                kernel, sched_index, new_codegen_state, hw_inames_left)
+                slabbed_kernel, sched_index, codegen_state, hw_inames_left)
         result.append(add_comment(cmt, inner))
 
     from loopy.codegen import gen_code_block
@@ -249,7 +260,9 @@ def generate_sequential_loop_dim_code(kernel, sched_index, codegen_state):
         if len(slabs) == 1:
             cmt = None
 
+        # Conditionals for slab are generated below.
         new_codegen_state = codegen_state.intersect(slab)
+
         inner = build_loop_nest(kernel, sched_index+1,
                 new_codegen_state)
 
-- 
GitLab