From 7f29d20573af85eac723944b426adf734ce96710 Mon Sep 17 00:00:00 2001
From: Andreas Kloeckner <inform@tiker.net>
Date: Sat, 8 Jun 2013 14:51:47 -0400
Subject: [PATCH] Add workarounds for
 http://llvm.org/bugs/show_bug.cgi?id=16149

---
 pyopencl/clrandom.py    | 19 +++++++++++--------
 pyopencl/elementwise.py | 18 ++++++++++++------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/pyopencl/clrandom.py b/pyopencl/clrandom.py
index 04ebe99e..bae170f5 100644
--- a/pyopencl/clrandom.py
+++ b/pyopencl/clrandom.py
@@ -187,7 +187,7 @@ class RanluxGenerator(object):
         return "\n".join(lines)
 
     @memoize_method
-    def get_gen_kernel(self, dtype, flavor=""):
+    def get_gen_kernel(self, dtype, distribution="uniform"):
         size_multiplier = 1
         arg_dtype = dtype
 
@@ -212,16 +212,19 @@ class RanluxGenerator(object):
             size_multiplier = 4
             arg_dtype = np.float32
         elif dtype == np.int32:
-            assert flavor == ""
+            assert distribution == "uniform"
             bits = 32
             c_type = "int"
             rng_expr = ("(shift "
-                    "+ convert_int4(scale * gen) "
-                    "+ convert_int4((scale / (1<<24)) * gen))")
+                    "+ convert_int4((float) scale * gen) "
+                    "+ convert_int4((float) (scale / (1<<24)) * gen))")
         else:
             raise TypeError("unsupported RNG data type '%s'" % dtype)
 
-        rl_flavor = "%d%s" % (bits, flavor)
+        rl_flavor = "%d%s" % (bits, {
+                "uniform": "",
+                "normal": "norm"
+                }[distribution])
 
         src = """//CL//
             %(defines)s
@@ -231,7 +234,7 @@ class RanluxGenerator(object):
             typedef %(output_t)s output_t;
             typedef %(output_t)s4 output_vec_t;
             #define NUM_WORKITEMS %(num_work_items)d
-            #define RANLUX_FUNC ranluxcl##%(rlflavor)s
+            #define RANLUX_FUNC ranluxcl%(rlflavor)s
             #define GET_RANDOM_NUM(gen) %(rng_expr)s
 
             kernel void generate(
@@ -290,7 +293,7 @@ class RanluxGenerator(object):
         if queue is None:
             queue = ary.queue
 
-        knl, size_multiplier = self.get_gen_kernel(ary.dtype, "")
+        knl, size_multiplier = self.get_gen_kernel(ary.dtype, "uniform")
         knl(queue,
                 (self.num_work_items,), None,
                 self.state.data, ary.data, ary.size*size_multiplier,
@@ -315,7 +318,7 @@ class RanluxGenerator(object):
         if queue is None:
             queue = ary.queue
 
-        knl, size_multiplier = self.get_gen_kernel(ary.dtype, "norm")
+        knl, size_multiplier = self.get_gen_kernel(ary.dtype, "normal")
         knl(queue,
                 (self.num_work_items,), self.wg_size,
                 self.state.data, ary.data, ary.size*size_multiplier, sigma, mu)
diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py
index 319ffb6b..3e63f4e0 100644
--- a/pyopencl/elementwise.py
+++ b/pyopencl/elementwise.py
@@ -721,12 +721,18 @@ def get_reverse_kernel(context, dtype):
 
 @context_dependent_memoize
 def get_arange_kernel(context, dtype):
-    return get_elwise_kernel(context,
-            "%(tp)s *z, %(tp)s start, %(tp)s step" % {
-                "tp": dtype_to_ctype(dtype),
-                },
-            "z[i] = start + i*step",
-            name="arange")
+    if dtype.kind == "c":
+        i = "%s_fromreal(i)" % complex_dtype_to_name(dtype)
+    else:
+        i = "(%s) i" % dtype_to_ctype(dtype)
+
+    return get_elwise_kernel(context, [
+        VectorArg(dtype, "z", with_offset=True),
+        ScalarArg(dtype, "start"),
+        ScalarArg(dtype, "step"),
+        ],
+        "z[i] = start + %s*step" % i,
+        name="arange")
 
 
 @context_dependent_memoize
-- 
GitLab