From 942e7d611f3c991ce785b5224329a10614644af9 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Fri, 24 Aug 2012 00:29:55 -0400 Subject: [PATCH] Fix slab decomposition for hw-parallel loops. --- loopy/codegen/loop.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/loopy/codegen/loop.py b/loopy/codegen/loop.py index 70ac54787..faeefa025 100644 --- a/loopy/codegen/loop.py +++ b/loopy/codegen/loop.py @@ -155,6 +155,14 @@ def generate_unroll_loop(kernel, sched_index, codegen_state): # }}} +def intersect_kernel_with_slab(kernel, slab, iname): + hdi = kernel.get_home_domain_index(iname) + home_domain = kernel.domains[hdi] + new_domains = kernel.domains[:] + new_domains[hdi] = home_domain & isl.align_spaces(slab, home_domain) + return kernel.copy(domains=new_domains) + + # {{{ hw-parallel loop def set_up_hw_parallel_loops(kernel, sched_index, codegen_state, hw_inames_left=None): @@ -223,9 +231,12 @@ def set_up_hw_parallel_loops(kernel, sched_index, codegen_state, hw_inames_left= if len(slabs) == 1: cmt = None - new_codegen_state = codegen_state.intersect(slab) + # Have the conditional infrastructure generate the + # slabbin conditionals. + slabbed_kernel = intersect_kernel_with_slab(kernel, slab, iname) + inner = set_up_hw_parallel_loops( - kernel, sched_index, new_codegen_state, hw_inames_left) + slabbed_kernel, sched_index, codegen_state, hw_inames_left) result.append(add_comment(cmt, inner)) from loopy.codegen import gen_code_block @@ -249,7 +260,9 @@ def generate_sequential_loop_dim_code(kernel, sched_index, codegen_state): if len(slabs) == 1: cmt = None + # Conditionals for slab are generated below. new_codegen_state = codegen_state.intersect(slab) + inner = build_loop_nest(kernel, sched_index+1, new_codegen_state) -- GitLab