From 21ba3f485f048c40c3eecf51db313eec79326632 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sat, 11 Aug 2012 16:26:04 -0400
Subject: [PATCH] Be smarter about determining reduction group size.

---
 pyopencl/reduction.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/pyopencl/reduction.py b/pyopencl/reduction.py
index 9d404a79..68562b11 100644
--- a/pyopencl/reduction.py
+++ b/pyopencl/reduction.py
@@ -158,10 +158,15 @@ def  get_reduction_source(
         max_work_group_size = device.max_work_group_size
         if "RV770" in device.name:
             max_work_group_size = 64
-        return min(
-                max_work_group_size,
-                (device.local_mem_size + out_type_size - 1)
-                // out_type_size)
+
+        # compute lmem limit
+        from pytools import div_ceil
+        lmem_wg_size = div_ceil(max_work_group_size, out_type_size)
+        result = min(max_work_group_size, lmem_wg_size)
+
+        # round down to power of 2
+        from pyopencl.tools import bitlog2
+        return 2**bitlog2(result)
 
     group_size = min(get_dev_group_size(dev) for dev in devices)
 
-- 
GitLab