From 2b98b07bcff88a5b2dc6f0fce8d5c989d90cc72a Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Wed, 15 Jul 2015 16:37:46 -0500
Subject: [PATCH] Adapt work group size based on available lmem

---
 pyopencl/bitonic_sort.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/pyopencl/bitonic_sort.py b/pyopencl/bitonic_sort.py
index a6e9b5a1..8e2b4045 100644
--- a/pyopencl/bitonic_sort.py
+++ b/pyopencl/bitonic_sort.py
@@ -176,7 +176,31 @@ class BitonicSort(object):
         allowb8 = True
         allowb16 = True
 
-        wg = min(ds, self.context.devices[0].max_work_group_size)
+        dev = self.context.devices[0]
+
+        # {{{ find workgroup size
+
+        wg = min(ds, dev.max_work_group_size)
+
+        available_lmem = dev.local_mem_size
+        while True:
+            lmem_size = wg*4*key_dtype.itemsize
+            if argsort:
+                lmem_size += wg*4*idx_dtype.itemsize
+
+            if lmem_size + 512 > available_lmem:
+                wg //= 2
+
+                if not wg:
+                    raise RuntimeError(
+                        "too little local memory available on '%s'"
+                        % dev)
+
+            else:
+                break
+
+        # }}}
+
         length = wg >> 1
         prg = self.get_program(
                 'BLO', argsort, (1, 1, key_ctype, idx_ctype, ds, ns))
-- 
GitLab