diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 258bae5cbc64090c5d70abb61dc39cfed7142218..607e9f9c798c030b36aeb6d5dbcd62dad580cfd0 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -666,6 +666,45 @@ def get_pow_array_kernel(dtype_x, dtype_y, dtype_z): ) +@context_dependent_memoize +def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): + """ + Returns the kernel for the operation: ``z = x ** y`` + """ + if np.float64 in [dtype_x, dtype_y]: + func = "pow" + else: + func = "powf" + + if not is_base_array and is_exp_array: + x_ctype = "%(tp_x)s x" + y_ctype = "%(tp_y)s *y" + func = "%s(x,y[i])" % func + + elif is_base_array and is_exp_array: + x_ctype = "%(tp_x)s *x" + y_ctype = "%(tp_y)s *y" + func = "%s(x[i],y[i])" % func + + elif is_base_array and not is_exp_array: + x_ctype = "%(tp_x)s *x" + y_ctype = "%(tp_y)s y" + func = "%s(x[i],y)" % func + + else: + raise AssertionError + + return get_elwise_kernel( + ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) + % { + "tp_x": dtype_to_ctype(dtype_x), + "tp_y": dtype_to_ctype(dtype_y), + "tp_z": dtype_to_ctype(dtype_z), + }, + "z[i] = %s" % func, + name="pow_method") + + @context_dependent_memoize def get_fmod_kernel(): return get_elwise_kernel( diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index a0bf84c4d42f93ba2215c895f7e3746a2d9b177a..5e7a560714a1a5769f311dfdded5416e771b53be 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -839,6 +839,27 @@ class GPUArray: """ return self._pow(other, new=False) + def __rpow__(self, other): + common_dtype = _get_common_dtype(self, other) + result = self._new_like_me(common_dtype) + + if not np.isscalar(other): + # Base is a gpuarray => do not cast. + base = other + else: + base = common_dtype.type(other) + + func = elementwise.get_rpow_kernel( + base.dtype, self.dtype, result.dtype, + is_base_array=not np.isscalar(other), is_exp_array=not np.isscalar(self)) + # Evaluates z = x ** y + func.prepared_async_call(self._grid, self._block, None, + result.gpudata, # z + base if np.isscalar(base) else base.gpudata, # x + self.gpudata, # y + self.mem_size) + return result + def reverse(self, stream=None): """Return this array in reversed order. The array is treated as one-dimensional. diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index dbf4b2f7f2ebee4fbec4a810b2ff3f8437d99458..5234d8a21b066620059622311e9cfc46811a31fe 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -40,6 +40,21 @@ class TestGPUArray: a_gpu = a_gpu.get() assert (np.abs(a ** 2 - a_gpu) < 1e-3).all() + @mark_cuda_test + def test_rpow_array(self): + scalar = np.random.rand() + a = abs(np.random.rand(10)) + a_gpu = gpuarray.to_gpu(a) + + result = (scalar ** a_gpu).get() + np.testing.assert_allclose(scalar ** a, result) + + result = (a_gpu ** a_gpu).get() + np.testing.assert_allclose(a ** a, result) + + result = (a_gpu ** scalar).get() + np.testing.assert_allclose(a ** scalar, result) + @mark_cuda_test def test_numpy_integer_shape(self): gpuarray.empty(np.int32(17), np.float32)