From 21ba3f485f048c40c3eecf51db313eec79326632 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Sat, 11 Aug 2012 16:26:04 -0400 Subject: [PATCH] Be smarter about determining reduction group size. --- pyopencl/reduction.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pyopencl/reduction.py b/pyopencl/reduction.py index 9d404a79..68562b11 100644 --- a/pyopencl/reduction.py +++ b/pyopencl/reduction.py @@ -158,10 +158,15 @@ def get_reduction_source( max_work_group_size = device.max_work_group_size if "RV770" in device.name: max_work_group_size = 64 - return min( - max_work_group_size, - (device.local_mem_size + out_type_size - 1) - // out_type_size) + + # compute lmem limit + from pytools import div_ceil + lmem_wg_size = div_ceil(max_work_group_size, out_type_size) + result = min(max_work_group_size, lmem_wg_size) + + # round down to power of 2 + from pyopencl.tools import bitlog2 + return 2**bitlog2(result) group_size = min(get_dev_group_size(dev) for dev in devices) -- GitLab