diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 3f6fd32dfd5bd9377286e5336d09d68d82bec4da..99b63969ad25bd78c762aa70c9ecb917a22360e4 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -605,14 +605,10 @@ class GPUArray(object): return result - def __pow__(self, other): - """pow function:: - - example: - array = pow(array) - array = pow(array,4) - array = pow(array,array) - + def _pow(self, other, new): + """ + Do the pow operator. + with new, the user can choose between ipow or just pow """ if isinstance(other, GPUArray): @@ -622,7 +618,10 @@ class GPUArray(object): assert self.shape == other.shape - result = self._new_like_me(_get_common_dtype(self, other)) + if new: + result = self._new_like_me(_get_common_dtype(self, other)) + else: + result = self func = elementwise.get_pow_array_kernel( self.dtype, other.dtype, result.dtype) @@ -637,7 +636,10 @@ class GPUArray(object): raise RuntimeError("only contiguous arrays may " "be used as arguments to this operation") - result = self._new_like_me() + if new: + result = self._new_like_me() + else: + result = self func = elementwise.get_pow_kernel(self.dtype) func.prepared_async_call(self._grid, self._block, None, other, self.gpudata, result.gpudata, @@ -645,6 +647,28 @@ class GPUArray(object): return result + def __pow__(self, other): + """pow function:: + + example: + array = pow(array) + array = pow(array,4) + array = pow(array,array) + + """ + return self._pow(other,new=True) + + def __ipow__(self, other): + """ipow function:: + + example: + array **= 4 + array **= array + + """ + return self._pow(other,new=False) + + def reverse(self, stream=None): """Return this array in reversed order. The array is treated as one-dimensional. diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 0a3218e1e5b849a9df165bd64dad3b95b709bd2f..103bba3d38c31c1106b0214407df46f5f8189663 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -36,6 +36,10 @@ class TestGPUArray: result = (a_gpu**a_gpu).get() assert (np.abs(pow(a, a) - result) < 1e-3).all() + a_gpu **= a_gpu + a_gpu = a_gpu.get() + assert (np.abs(pow(a, a) - a_gpu) < 1e-3).all() + @mark_cuda_test def test_pow_number(self): a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float32) @@ -44,6 +48,10 @@ class TestGPUArray: result = pow(a_gpu, 2).get() assert (np.abs(a**2 - result) < 1e-3).all() + a_gpu **= 2 + a_gpu = a_gpu.get() + assert (np.abs(a**2 - a_gpu) < 1e-3).all() + @mark_cuda_test def test_numpy_integer_shape(self): gpuarray.empty(np.int32(17), np.float32)