diff --git a/pyopencl/reduction.py b/pyopencl/reduction.py index 00995450831b101cefca040dfc756a4261e52fd7..6eebe729f57b6f81cfab4bf8321173fe4da1bb46 100644 --- a/pyopencl/reduction.py +++ b/pyopencl/reduction.py @@ -386,10 +386,16 @@ class ReductionKernel: else: allocator = repr_vec.allocator - if sz <= stage_inf.group_size*SMALL_SEQ_COUNT*MAX_GROUP_COUNT: + if sz == 0: + result = empty(use_queue, (), self.dtype_out, allocator=allocator) + group_count = 1 + seq_count = 0 + + elif sz <= stage_inf.group_size*SMALL_SEQ_COUNT*MAX_GROUP_COUNT: total_group_size = SMALL_SEQ_COUNT*stage_inf.group_size group_count = (sz + total_group_size - 1) // total_group_size seq_count = SMALL_SEQ_COUNT + else: group_count = MAX_GROUP_COUNT macrogroup_size = group_count*stage_inf.group_size diff --git a/test/test_algorithm.py b/test/test_algorithm.py index 660c7dfc868f9ae8cf356b4199da68f385e5d8dd..353af28173c1dadec40e8c761e4b8df8bd45caaf 100644 --- a/test/test_algorithm.py +++ b/test/test_algorithm.py @@ -247,15 +247,21 @@ def test_sum(ctx_factory): slice(1000, -3000), slice(1000, None), slice(1000, None, 3), + slice(1000, 1000), ]: sum_a = np.sum(a[slc]) + if sum_a: + ref_divisor = abs(sum_a) + else: + ref_divisor = 1 + if slc.step is None: sum_a_gpu = cl_array.sum(a_gpu[slc]).get() - assert abs(sum_a_gpu - sum_a) / abs(sum_a) < 1e-4 + assert abs(sum_a_gpu - sum_a) / ref_divisor < 1e-4 sum_a_gpu_2 = cl_array.sum(a_gpu, slice=slc).get() - assert abs(sum_a_gpu_2 - sum_a) / abs(sum_a) < 1e-4 + assert abs(sum_a_gpu_2 - sum_a) / ref_divisor < 1e-4 def test_sum_without_data(ctx_factory):