From 7f29d20573af85eac723944b426adf734ce96710 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner <inform@tiker.net> Date: Sat, 8 Jun 2013 14:51:47 -0400 Subject: [PATCH] Add workarounds for http://llvm.org/bugs/show_bug.cgi?id=16149 --- pyopencl/clrandom.py | 19 +++++++++++-------- pyopencl/elementwise.py | 18 ++++++++++++------ 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/pyopencl/clrandom.py b/pyopencl/clrandom.py index 04ebe99e..bae170f5 100644 --- a/pyopencl/clrandom.py +++ b/pyopencl/clrandom.py @@ -187,7 +187,7 @@ class RanluxGenerator(object): return "\n".join(lines) @memoize_method - def get_gen_kernel(self, dtype, flavor=""): + def get_gen_kernel(self, dtype, distribution="uniform"): size_multiplier = 1 arg_dtype = dtype @@ -212,16 +212,19 @@ class RanluxGenerator(object): size_multiplier = 4 arg_dtype = np.float32 elif dtype == np.int32: - assert flavor == "" + assert distribution == "uniform" bits = 32 c_type = "int" rng_expr = ("(shift " - "+ convert_int4(scale * gen) " - "+ convert_int4((scale / (1<<24)) * gen))") + "+ convert_int4((float) scale * gen) " + "+ convert_int4((float) (scale / (1<<24)) * gen))") else: raise TypeError("unsupported RNG data type '%s'" % dtype) - rl_flavor = "%d%s" % (bits, flavor) + rl_flavor = "%d%s" % (bits, { + "uniform": "", + "normal": "norm" + }[distribution]) src = """//CL// %(defines)s @@ -231,7 +234,7 @@ class RanluxGenerator(object): typedef %(output_t)s output_t; typedef %(output_t)s4 output_vec_t; #define NUM_WORKITEMS %(num_work_items)d - #define RANLUX_FUNC ranluxcl##%(rlflavor)s + #define RANLUX_FUNC ranluxcl%(rlflavor)s #define GET_RANDOM_NUM(gen) %(rng_expr)s kernel void generate( @@ -290,7 +293,7 @@ class RanluxGenerator(object): if queue is None: queue = ary.queue - knl, size_multiplier = self.get_gen_kernel(ary.dtype, "") + knl, size_multiplier = self.get_gen_kernel(ary.dtype, "uniform") knl(queue, (self.num_work_items,), None, self.state.data, ary.data, ary.size*size_multiplier, @@ -315,7 +318,7 @@ class RanluxGenerator(object): if queue is None: queue = ary.queue - knl, size_multiplier = self.get_gen_kernel(ary.dtype, "norm") + knl, size_multiplier = self.get_gen_kernel(ary.dtype, "normal") knl(queue, (self.num_work_items,), self.wg_size, self.state.data, ary.data, ary.size*size_multiplier, sigma, mu) diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py index 319ffb6b..3e63f4e0 100644 --- a/pyopencl/elementwise.py +++ b/pyopencl/elementwise.py @@ -721,12 +721,18 @@ def get_reverse_kernel(context, dtype): @context_dependent_memoize def get_arange_kernel(context, dtype): - return get_elwise_kernel(context, - "%(tp)s *z, %(tp)s start, %(tp)s step" % { - "tp": dtype_to_ctype(dtype), - }, - "z[i] = start + i*step", - name="arange") + if dtype.kind == "c": + i = "%s_fromreal(i)" % complex_dtype_to_name(dtype) + else: + i = "(%s) i" % dtype_to_ctype(dtype) + + return get_elwise_kernel(context, [ + VectorArg(dtype, "z", with_offset=True), + ScalarArg(dtype, "start"), + ScalarArg(dtype, "step"), + ], + "z[i] = start + %s*step" % i, + name="arange") @context_dependent_memoize -- GitLab