diff --git a/pyopencl/array.py b/pyopencl/array.py index fcedc4279f144f48b5eee43fe081274d6b4c2b4c..a84a8ebfa2dede7ab25044f8087073f81c601a03 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -34,7 +34,6 @@ import numpy import pyopencl.elementwise as elementwise import pyopencl as cl #from pytools import memoize_method -from decorator import decorator @@ -74,34 +73,35 @@ def _get_common_dtype(obj1, obj2): -@decorator -def elwise_kernel_runner(kernel_getter, *args, **kwargs): +def elwise_kernel_runner(kernel_getter): """Take a kernel getter of the same signature as the kernel and return a function that invokes that kernel. Assumes that the zeroth entry in *args* is an :class:`Array`. """ + #(Note that the 'return a function' bit is done by @decorator.) - # The decorators module converts kwargs to positional arguments, - # so we pop the queue argument first - args = list(args) - repr_ary = args[0] - queue = args.pop() or repr_ary.queue + def kernel_runner(*args, **kwargs): + repr_ary = args[0] + queue = kwargs.pop("queue", None) or repr_ary.queue - gs, ls = repr_ary.get_sizes(queue) - knl = kernel_getter(*args) + gs, ls = repr_ary.get_sizes(queue) + knl = kernel_getter(*args) - assert isinstance(repr_ary, Array) + assert isinstance(repr_ary, Array) - actual_args = [] - for arg in args: - if isinstance(arg, Array): - actual_args.append(arg.data) - else: - actual_args.append(arg) - actual_args.append(repr_ary.mem_size) + actual_args = [] + for arg in args: + if isinstance(arg, Array): + actual_args.append(arg.data) + else: + actual_args.append(arg) + actual_args.append(repr_ary.mem_size) + + return knl(queue, gs, ls, *actual_args) - return knl(queue, gs, ls, *actual_args) + from functools import update_wrapper + return update_wrapper(kernel_runner, kernel_getter) @@ -167,7 +167,7 @@ class Array(object): #@memoize_method FIXME: reenable def get_sizes(self, queue): - return splay(self.queue, self.mem_size) + return splay(queue, self.mem_size) def set(self, ary, queue=None, async=False): assert ary.size == self.size @@ -221,22 +221,19 @@ class Array(object): """Compute ``out = afac * a + other``, where `other` is a scalar.""" return elementwise.get_axpbz_kernel(out.context, out.dtype) + @staticmethod @elwise_kernel_runner - def _elwise_multiply(self, out, other, queue=None): + def _elwise_multiply(out, a, b, queue=None): return elementwise.get_multiply_kernel( - self.context, self.dtype, other.dtype, out.dtype) + a.context, a.dtype, b.dtype, out.dtype) + @staticmethod @elwise_kernel_runner - def _rdiv_scalar(self, out, other, queue=None): - """Divides an array by a scalar:: - - y = n / self - """ - - assert self.dtype == numpy.float32 - - return elementwise.get_rdivide_elwise_kernel(self.context, self.dtype) + def _rdiv_scalar(out, ary, other, queue=None): + return elementwise.get_rdivide_elwise_kernel( + out.context, ary.dtype) + @staticmethod @elwise_kernel_runner def _div(self, out, other, queue=None): """Divides an array by another array.""" @@ -246,32 +243,59 @@ class Array(object): return elementwise.get_divide_kernel(self.context, self.dtype, other.dtype, out.dtype) + @staticmethod @elwise_kernel_runner def _fill(result, scalar): return elementwise.get_fill_kernel(result.context, result.dtype) + @staticmethod @elwise_kernel_runner def _abs(result, arg): + if arg.dtype.kind == "f": + fname = "fabs" + elif arg.dtype.kind in ["u", "i"]: + fname = "abs" + else: + raise TypeError("unsupported dtype in _abs()") + return elementwise.get_unary_func_kernel( arg.context, fname, arg.dtype) + @staticmethod + @elwise_kernel_runner + def _pow_scalar(result, ary, exponent): + return elementwise.get_pow_kernel(result.context, result.dtype) + + @staticmethod + @elwise_kernel_runner + def _pow_array(result, base, exponent): + return elementwise.get_pow_array_kernel( + result.context, base.dtype, exponent.dtype, result.dtype) + + @staticmethod + @elwise_kernel_runner + def _reverse(result, ary): + return elementwise.get_reverse_kernel(result.context, ary.dtype) + + @staticmethod @elwise_kernel_runner - def _pow_scalar(self, ): - return elementwise.get_pow_kernel(self.context, self.dtype) + def _copy(dest, src): + return elementwise.get_copy_kernel( + dest.context, dest.dtype, src.dtype) - def _new_like_me(self, dtype=None): + def _new_like_me(self, dtype=None, queue=None): if dtype is None: dtype = self.dtype return self.__class__(self.context, self.shape, dtype, allocator=self.allocator, - queue=self.queue) + queue=queue or self.queue) # operators --------------------------------------------------------------- def mul_add(self, selffac, other, otherfac, queue=None): """Return `selffac * self + otherfac*other`. """ result = self._new_like_me(_get_common_dtype(self, other)) - self._axpbyz(result, selffac, self, other, otherfac) + self._axpbyz(result, selffac, self, otherfac, other) return result def __add__(self, other): @@ -301,12 +325,13 @@ class Array(object): self._axpbyz(result, 1, self, -1, other) return result else: + # subtract a scalar if other == 0: return self else: - # create a new array for the result result = self._new_like_me() - return self._axpbz(result, 1, self, -other) + self._axpbz(result, 1, self, -other) + return result def __rsub__(self,other): """Substracts an array by a scalar or an array:: @@ -315,23 +340,26 @@ class Array(object): """ # other must be a scalar result = self._new_like_me() - return self._axpbz(result, -1, self, other) + self._axpbz(result, -1, self, other) + return result def __iadd__(self, other): self._axpbyz(self, 1, self, 1, other) return self def __isub__(self, other): - return self._axpbyz(self, 1, self, -1, other) + self._axpbyz(self, 1, self, -1, other) + return self def __neg__(self): result = self._new_like_me() - return self._axpbz(result, -1, self, 0) + self._axpbz(result, -1, self, 0) + return result def __mul__(self, other): if isinstance(other, Array): result = self._new_like_me(_get_common_dtype(self, other)) - self._elwise_multiply(result, other) + self._elwise_multiply(result, self, other) return result else: result = self._new_like_me() @@ -354,7 +382,7 @@ class Array(object): """ if isinstance(other, Array): result = self._new_like_me(_get_common_dtype(self, other)) - return self._div(result, other) + self._div(result, self, other) else: if other == 1: return self @@ -362,7 +390,8 @@ class Array(object): # create a new array for the result result = self._new_like_me() self._axpbz(result, 1/other, self, 0) - return result + + return result __truediv__ = __div__ @@ -381,7 +410,7 @@ class Array(object): else: # create a new array for the result result = self._new_like_me() - self._rdiv_scalar(result, other) + self._rdiv_scalar(result, self, other) return result @@ -404,13 +433,6 @@ class Array(object): of `self`. """ - if self.dtype.kind == "f": - fname = "fabs" - elif self.dtype.kind in ["u", "i"]: - fname = "abs" - else: - raise TypeError("unsupported dtype in __abs__()") - result = self._new_like_me() self._abs(result, self) return result @@ -424,22 +446,12 @@ class Array(object): assert self.shape == other.shape result = self._new_like_me(_get_common_dtype(self, other)) - - knl = elementwise.get_pow_array_kernel(self.context, - self.dtype, other.dtype, result.dtype) - - knl(self.queue, self._global_size, self._local_size, - self.data, other.data, result.data, - self.mem_size) - - return result + self._pow_array(result, self, other) else: result = self._new_like_me() - knl(self.queue, self._global_size, self._local_size, - other, self.data, result.data, - self.mem_size) + self._pow_scalar(result, self, other) - return result + return result def reverse(self, queue=None): """Return this array in reversed order. The array is treated @@ -447,11 +459,7 @@ class Array(object): """ result = self._new_like_me() - - knl = elementwise.get_reverse_kernel(self.context, self.dtype) - knl(queue or self.queue, self._global_size, self._local_size, - self.data, result.data, self.mem_size) - + self._reverse(result, self) return result def astype(self, dtype, queue=None): @@ -459,14 +467,9 @@ class Array(object): return self result = self._new_like_me(dtype=dtype) - - knl = elementwise.get_copy_kernel(self.context, dtype, self.dtype) - knl(queue or self.queue, self._global_size, self._local_size, - result.data, self.data, self.mem_size) - + self._copy(result, self, queue=queue) return result - # slicing ----------------------------------------------------------------- def __getitem__(self, idx): if idx == (): @@ -541,6 +544,11 @@ def zeros_like(ary): return result +@elwise_kernel_runner +def _arange(result, start, step): + return elementwise.get_arange_kernel( + result.context, result.dtype) + def arange(context, queue, *args, **kwargs): """Create an array filled with numbers spaced `step` apart, starting from `start` and ending at `stop`. @@ -617,30 +625,27 @@ def arange(context, queue, *args, **kwargs): size = int(ceil((stop-start)/step)) result = Array(context, (size,), dtype, queue=queue) + _arange(result, start, step, queue=queue) + return result - knl = elementwise.get_arange_kernel(context, dtype) - knl(queue, result._global_size, result._local_size, - result.data, start, step, size) - return result +@elwise_kernel_runner +def _take(result, ary, indices): + return elementwise.get_take_kernel( + result.context, result.dtype, indices.dtype) -def take(a, indices, out=None, queue=None): - queue = queue or a.queue + +def take(a, indices, out=None, queue=None): if out is None: out = Array(a.context, indices.shape, a.dtype, a.allocator, - queue=queue) + queue=queue or a.queue) assert len(indices.shape) == 1 - - knl = elementwise.get_take_kernel( - a.context, a.dtype, indices.dtype) - knl(queue, a._global_size, a._local_size, - indices.data, a.data, out.data, indices.size) - + _take(out, a, indices, queue=queue) return out @@ -684,10 +689,11 @@ def multi_take(arrays, indices, out=None, queue=None): if start_i + chunk_size > vec_count: knl = make_func_for_chunk_size(vec_count-start_i) - knl(queue, indices._global_size, indices._local_size, + gs, ls = indices.get_sizes(queue) + knl(queue, gs, ls, indices.data, - *([i.data for i in arrays[chunk_slice]] - + [o.data for o in out[chunk_slice]] + *([o.data for o in out[chunk_slice]] + + [i.data for i in arrays[chunk_slice]] + [indices.size])) return out @@ -751,10 +757,11 @@ def multi_take_put(arrays, dest_indices, src_indices, dest_shape=None, if start_i + chunk_size > vec_count: knl = make_func_for_chunk_size(vec_count-start_i) - knl(queue, src_indices._global_size, src_indices._local_size, - dest_indices.data, src_indices.data, - *([i.data for i in arrays[chunk_slice]] - + [o.data for o in out[chunk_slice]] + gs, ls = src_indices.get_sizes(queue) + knl(queue, gs, ls, + *([o.data for o in out[chunk_slice]] + + [dest_indices.data, src_indices.data] + + [i.data for i in arrays[chunk_slice]] + src_offsets_list[chunk_slice] + [src_indices.size])) @@ -803,9 +810,10 @@ def multi_put(arrays, dest_indices, dest_shape=None, out=None, queue=None): if start_i + chunk_size > vec_count: knl = make_func_for_chunk_size(vec_count-start_i) - knl(queue, dest_indices._global_size, dest_indices._local_size, - dest_indices.data, + gs, ls = dest_indices.get_sizes(queue) + knl(queue, gs, ls, *([o.data for o in out[chunk_slice]] + + [dest_indices.data] + [i.data for i in arrays[chunk_slice]] + [dest_indices.size])) @@ -814,6 +822,14 @@ def multi_put(arrays, dest_indices, dest_shape=None, out=None, queue=None): +@elwise_kernel_runner +def _if_positive(result, criterion, then_, else_): + return elementwise.get_if_positive_kernel( + result.context, criterion.dtype, then_.dtype) + + + + def if_positive(criterion, then_, else_, out=None, queue=None): if not (criterion.shape == then_.shape == else_.shape): raise ValueError("shapes do not match") @@ -827,11 +843,7 @@ def if_positive(criterion, then_, else_, out=None, queue=None): if out is None: out = empty_like(then_) - - knl(queue or criterion.queue, criterion._global_size, criterion._local_size, - criterion.data, then_.data, else_.data, out.data, - criterion.size) - + _if_positive(out, criterion, then_, else_) return out diff --git a/pyopencl/clmath.py b/pyopencl/clmath.py index 9500486c01003a6b420b2cdae8facf21bac5ca3d..46688d41e7c9309df23748d605408edb1c7a6626 100644 --- a/pyopencl/clmath.py +++ b/pyopencl/clmath.py @@ -2,14 +2,16 @@ import pyopencl.array as cl_array import pyopencl.elementwise as elementwise def _make_unary_array_func(name): - def f(array, queue=None): - result = array._new_like_me() - - knl = elementwise.get_unary_func_kernel(array.context, name, array.dtype) - knl(queue or array.queue, array._global_size, array._local_size, - array.data, result.data, array.mem_size) + @cl_array.elwise_kernel_runner + def knl_runner(result, arg): + return elementwise.get_unary_func_kernel( + result.context, name, arg.dtype) + def f(array, queue=None): + result = array._new_like_me(queue=queue) + knl_runner(result, array, queue=queue) return result + return f # See table 6.8 in the CL spec @@ -49,49 +51,47 @@ floor = _make_unary_array_func("floor") # TODO: fmax # TODO: fmin +@cl_array.elwise_kernel_runner +def _fmod(result, arg, mod): + return elementwise.get_fmod_kernel(result.context) + def fmod(arg, mod, queue=None): """Return the floating point remainder of the division `arg/mod`, for each element in `arg` and `mod`.""" - result = arg._new_like_me() - - knl = elementwise.get_fmod_kernel(arg.context) - knl(queue or arg.queue, arg._global_size, arg._local_size, - arg.data, mod.data, result.data, arg.mem_size) - + result = arg._new_like_me(queue=queue) + _fmod(result, arg, mod, queue=queue) return result # TODO: fract +@cl_array.elwise_kernel_runner +def _frexp(sig, expt, arg): + return elementwise.get_frexp_kernel(sig.context) + def frexp(arg, queue=None): """Return a tuple `(significands, exponents)` such that `arg == significand * 2**exponent`. """ - sig = arg._new_like_me() - expt = arg._new_like_me() - - knl = elementwise.get_frexp_kernel(arg.context) - knl(queue or arg.queue, arg._global_size, arg._local_size, - arg.data, sig.data, expt.data, arg.mem_size) - + sig = arg._new_like_me(queue=queue) + expt = arg._new_like_me(queue=queue) + _frexp(sig, expt, arg, queue=queue) return sig, expt # TODO: hypot ilogb = _make_unary_array_func("ilogb") +@cl_array.elwise_kernel_runner +def _ldexp(result, sig, exp): + return elementwise.get_ldexp_kernel(result.context) + def ldexp(significand, exponent, queue=None): """Return a new array of floating point values composed from the entries of `significand` and `exponent`, paired together as `result = significand * 2**exponent`. """ - result = significand._new_like_me() - - knl = elementwise.get_ldexp_kernel(significand.context) - knl(queue or significand.queue, - significand._global_size, significand._local_size, - significand.data, exponent.data, result.data, - significand.mem_size) - + result = significand._new_like_me(queue=queue) + _ldexp(result, significand, exponent) return result lgamma = _make_unary_array_func("lgamma") @@ -107,19 +107,18 @@ logb = _make_unary_array_func("logb") # TODO: maxmag # TODO: minmag +@cl_array.elwise_kernel_runner +def _modf(intpart, fracpart, arg): + return elementwise.get_modf_kernel(intpart.context) + def modf(arg, queue=None): """Return a tuple `(fracpart, intpart)` of arrays containing the integer and fractional parts of `arg`. """ - intpart = arg._new_like_me() - fracpart = arg._new_like_me() - - knl = elementwise.get_modf_kernel(arg.context) - knl(queue or arg.queue, arg._global_size, arg._local_size, - arg.data, intpart.data, fracpart.data, - arg.mem_size) - + intpart = arg._new_like_me(queue=queue) + fracpart = arg._new_like_me(queue=queue) + _modf(intpart, fracpart, arg, queue=queue) return fracpart, intpart nan = _make_unary_array_func("nan") diff --git a/pyopencl/clrandom.py b/pyopencl/clrandom.py index 0cc344d9a8df01b065569fdfc84352dd29d730ce..f1163a8b2688ab911484221f6fc32aaa9b6987c7 100644 --- a/pyopencl/clrandom.py +++ b/pyopencl/clrandom.py @@ -1,4 +1,9 @@ import pyopencl as cl +import pyopencl.array as cl_array +from pyopencl.tools import context_dependent_memoize + + + md5_code = """ /* @@ -175,14 +180,14 @@ md5_code = """ import numpy -def rand(context, queue, shape, dtype): - from pyopencl.array import Array - from pyopencl.elementwise import get_elwise_kernel - result = Array(context, shape, dtype, queue=queue) + +@context_dependent_memoize +def get_rand_kernel(context, dtype): + from pyopencl.elementwise import get_elwise_kernel if dtype == numpy.float32: - func = get_elwise_kernel(context, + return get_elwise_kernel(context, "float *dest, unsigned int seed", md5_code + """ #define POW_2_M32 (1/4294967296.0f) @@ -196,7 +201,7 @@ def rand(context, queue, shape, dtype): """, "md5_rng_float") elif dtype == numpy.float64: - func = get_elwise_kernel(context, + return get_elwise_kernel(context, "double *dest, unsigned int seed", md5_code + """ #define POW_2_M32 (1/4294967296.0) @@ -211,7 +216,7 @@ def rand(context, queue, shape, dtype): """, "md5_rng_float") elif dtype in [numpy.int32, numpy.uint32]: - func = get_elwise_kernel(context, + return get_elwise_kernel(context, "unsigned int *dest, unsigned int seed", md5_code + """ dest[i] = a; @@ -226,10 +231,18 @@ def rand(context, queue, shape, dtype): else: raise NotImplementedError - func(queue, result._global_size, result._local_size, - result.data, numpy.random.randint(2**31-1), - result.size) + + +@cl_array.elwise_kernel_runner +def _rand(output, seed): + return get_rand_kernel(output.context, output.dtype) + +def rand(context, queue, shape, dtype): + from pyopencl.array import Array + + result = Array(context, shape, dtype, queue=queue) + _rand(result, numpy.random.randint(2**31-1)) return result diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py index 4ec64b73d9e5cff11b784db6553a76fda5886b2f..bcf2fb54449d61b9f718d837b3c803975e7b2d2c 100644 --- a/pyopencl/elementwise.py +++ b/pyopencl/elementwise.py @@ -167,8 +167,8 @@ def get_take_kernel(context, dtype, idx_dtype, vec_count=1): } args = ([VectorArg(dtype, "dest"+str(i))for i in range(vec_count)] - + [VectorArg(idx_dtype, "idx")] - + [VectorArg(dtype, "src"+str(i))for i in range(vec_count)] ) + + [VectorArg(dtype, "src"+str(i))for i in range(vec_count)] + + [VectorArg(idx_dtype, "idx")]) body = ( ("%(idx_tp)s src_idx = idx[i];\n" % ctx) + "\n".join( @@ -344,7 +344,7 @@ def get_multiply_kernel(context, dtype_x, dtype_y, dtype_z): @context_dependent_memoize def get_divide_kernel(context, dtype_x, dtype_y, dtype_z): return get_elwise_kernel(context, - "%(tp_z)s *z, %(tp_x)s *x, %(tp_y)s *y, " % { + "%(tp_z)s *z, %(tp_x)s *x, %(tp_y)s *y" % { "tp_x": dtype_to_ctype(dtype_x), "tp_y": dtype_to_ctype(dtype_y), "tp_z": dtype_to_ctype(dtype_z), @@ -392,7 +392,7 @@ def get_arange_kernel(context, dtype): @context_dependent_memoize def get_pow_kernel(context, dtype): return get_elwise_kernel(context, - "%(tp)s *z, %(tp)s value, %(tp)s *y, " % { + "%(tp)s *z, %(tp)s *y, %(tp)s value" % { "tp": dtype_to_ctype(dtype), }, "z[i] = pow(y[i], value)",