From 4b3b52039d57ff2c5c5529fc4a82ae5b2797df35 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Mon, 2 Sep 2013 15:47:23 -0500 Subject: [PATCH] Fix reduction when no_sync_size <= group_size (spotted by Alex Nitz) --- pyopencl/reduction.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pyopencl/reduction.py b/pyopencl/reduction.py index f342909f..489be110 100644 --- a/pyopencl/reduction.py +++ b/pyopencl/reduction.py @@ -110,7 +110,15 @@ KERNEL = """//CL// barrier(CLK_LOCAL_MEM_FENCE); - if (lid < ${no_sync_size}) + <% + # NB: There's an exact duplicate of this calculation in the + # %while loop below. + + new_size = cur_size // 2 + assert new_size * 2 == cur_size + %> + + if (lid < ${new_size}) { __local volatile out_type *lvdata = ldata; % while cur_size > 1: -- GitLab