From 2b98b07bcff88a5b2dc6f0fce8d5c989d90cc72a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Wed, 15 Jul 2015 16:37:46 -0500 Subject: [PATCH] Adapt work group size based on available lmem --- pyopencl/bitonic_sort.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/pyopencl/bitonic_sort.py b/pyopencl/bitonic_sort.py index a6e9b5a1..8e2b4045 100644 --- a/pyopencl/bitonic_sort.py +++ b/pyopencl/bitonic_sort.py @@ -176,7 +176,31 @@ class BitonicSort(object): allowb8 = True allowb16 = True - wg = min(ds, self.context.devices[0].max_work_group_size) + dev = self.context.devices[0] + + # {{{ find workgroup size + + wg = min(ds, dev.max_work_group_size) + + available_lmem = dev.local_mem_size + while True: + lmem_size = wg*4*key_dtype.itemsize + if argsort: + lmem_size += wg*4*idx_dtype.itemsize + + if lmem_size + 512 > available_lmem: + wg //= 2 + + if not wg: + raise RuntimeError( + "too little local memory available on '%s'" + % dev) + + else: + break + + # }}} + length = wg >> 1 prg = self.get_program( 'BLO', argsort, (1, 1, key_ctype, idx_ctype, ds, ns)) -- GitLab