From ea8168e57baf2256dffc99d35f65e3ede27ca782 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sat, 19 Nov 2011 20:01:46 -0500
Subject: [PATCH] Work around failure in struct reduce on Apple Lion GPU.

---
 pyopencl/reduction.py |  1 -
 test/test_array.py    | 40 +++++++++++++++++++++++++---------------
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/pyopencl/reduction.py b/pyopencl/reduction.py
index 082555cc..fbe01f75 100644
--- a/pyopencl/reduction.py
+++ b/pyopencl/reduction.py
@@ -290,7 +290,6 @@ class ReductionKernel:
             trip_count += 1
             assert trip_count <= 2
 
-        # stage 2 has only one input and no map expression
         self.stage_2_inf = get_reduction_kernel(2, ctx,
                 dtype_to_ctype(dtype_out), dtype_out.itemsize,
                 neutral, reduce_expr, arguments=arguments,
diff --git a/test/test_array.py b/test/test_array.py
index 8dd2fab7..504d1f4a 100644
--- a/test/test_array.py
+++ b/test/test_array.py
@@ -659,7 +659,11 @@ def test_view(ctx_factory):
     view = a_dev.view(np.int16)
     assert view.shape == (8, 32) and view.dtype == np.int16
 
-mmc_dtype = np.dtype([("cur_min", np.float32), ("cur_max", np.float32)])
+mmc_dtype = np.dtype([
+    ("cur_min", np.int32),
+    ("cur_max", np.int32),
+    ("pad", np.int32),
+    ])
 
 from pyopencl.tools import register_dtype
 register_dtype(mmc_dtype, "minmax_collector")
@@ -669,11 +673,13 @@ def test_struct_reduce(ctx_factory):
     context = ctx_factory()
     queue = cl.CommandQueue(context)
 
-    preamble = """//CL//
+    preamble = r"""//CL//
     struct minmax_collector
     {
-        float cur_min;
-        float cur_max;
+        int cur_min;
+        int cur_max;
+        // Workaround for OS X Lion GPU CL. Mystifying.
+        int pad;
     };
 
     typedef struct minmax_collector minmax_collector;
@@ -681,22 +687,27 @@ def test_struct_reduce(ctx_factory):
     minmax_collector mmc_neutral()
     {
         // FIXME: needs infinity literal in real use, ok here
-        minmax_collector result = {10000, -10000};
+        minmax_collector result;
+        result.cur_min = 1<<30;
+        result.cur_max = -(1<<30);
         return result;
     }
 
     minmax_collector mmc_from_scalar(float x)
     {
-        minmax_collector result = {x, x};
+        minmax_collector result;
+        result.cur_min = x;
+        result.cur_max = x;
         return result;
     }
 
     minmax_collector agg_mmc(minmax_collector a, minmax_collector b)
     {
-        minmax_collector result = {
-            fmin(a.cur_min, b.cur_min),
-            fmax(a.cur_max, b.cur_max),
-            };
+        minmax_collector result = a;
+        if (b.cur_min < result.cur_min)
+            result.cur_min = b.cur_min;
+        if (b.cur_max > result.cur_max)
+            result.cur_max = b.cur_max;
         return result;
     }
 
@@ -704,22 +715,21 @@ def test_struct_reduce(ctx_factory):
 
 
     from pyopencl.clrandom import rand as clrand
-    a_gpu = clrand(queue, (20000,), dtype=np.float32)
+    a_gpu = clrand(queue, (20000,), dtype=np.int32, a=0, b=10**6)
     a = a_gpu.get()
 
-
     from pyopencl.reduction import ReductionKernel
     red = ReductionKernel(context, mmc_dtype,
             neutral="mmc_neutral()",
             reduce_expr="agg_mmc(a, b)", map_expr="mmc_from_scalar(x[i])",
-            arguments="__global float *x", preamble=preamble)
+            arguments="__global int *x", preamble=preamble)
 
     minmax = red(a_gpu).get()
     #print minmax["cur_min"], minmax["cur_max"]
     #print np.min(a), np.max(a)
 
-    assert minmax["cur_min"] == np.min(a)
-    assert minmax["cur_max"] == np.max(a)
+    assert abs(minmax["cur_min"] - np.min(a)) < 1e-5
+    assert abs(minmax["cur_max"] - np.max(a)) < 1e-5
 
 
 
-- 
GitLab