diff --git a/doc/source/array.rst b/doc/source/array.rst index 34efb1cff9485fe8bce7c9f9e7112f3e5f75c697..3b12f04ad10fd650349e9c8d6412e2a1e7a3a83f 100644 --- a/doc/source/array.rst +++ b/doc/source/array.rst @@ -189,12 +189,26 @@ The :class:`GPUArray` Array Class .. versionadded: 0.94 - .. method :: conj() + .. method :: conj(out=None) - Return the complex conjugate of *self*, or *self* if it is real. + Return the complex conjugate of *self*, or *self* if it is real. If *out* + is not given, a newly allocated :class:`GPUArray` will returned. Use + *out=self* to get conjugate in-place. .. versionadded: 0.94 + .. versionchanged:: 2020.1.1 + + add *out* parameter + + + .. method :: conjugate(out=None) + + alias of :meth:`conj` + + .. versionadded:: 2020.1.1 + + .. method:: bind_to_texref(texref, allow_offset=False) Bind *self* to the :class:`pycuda.driver.TextureReference` *texref*. diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 633edd78d7b2ae07dcc61b53fd3b7a498df0854f..258bae5cbc64090c5d70abb61dc39cfed7142218 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -606,11 +606,12 @@ def get_imag_kernel(dtype, real_dtype): @context_dependent_memoize -def get_conj_kernel(dtype): +def get_conj_kernel(dtype, conj_dtype): return get_elwise_kernel( - "%(tp)s *y, %(tp)s *z" + "%(tp)s *y, %(conj_tp)s *z" % { "tp": dtype_to_ctype(dtype), + "conj_tp": dtype_to_ctype(conj_dtype) }, "z[i] = pycuda::conj(y[i])", "conj", diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index a1a3f3f34848f9e1ca7eb7b9da2b31098cb0cb65..f5908a064108bc142a6a5c21f47b7c33a16015e8 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -1141,7 +1141,7 @@ class GPUArray: else: return zeros_like(self) - def conj(self): + def conj(self, out=None): dtype = self.dtype if issubclass(self.dtype.type, np.complexfloating): if not self.flags.forc: @@ -1154,9 +1154,12 @@ class GPUArray: order = "F" else: order = "C" - result = self._new_like_me(order=order) + if out is None: + result = self._new_like_me(order=order) + else: + result = out - func = elementwise.get_conj_kernel(dtype) + func = elementwise.get_conj_kernel(dtype, result.dtype) func.prepared_async_call( self._grid, self._block, @@ -1170,6 +1173,8 @@ class GPUArray: else: return self + conjugate = conj + # }}} # {{{ rich comparisons diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index fc7b67366c509089fd3a26b4e3c183fde1ca9269..d5d092510e3fa742acc9742c6635f7fa91b00dbc 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -732,6 +732,10 @@ class TestGPUArray: assert la.norm(z.get().real - z.real.get()) == 0 assert la.norm(z.get().imag - z.imag.get()) == 0 assert la.norm(z.get().conj() - z.conj().get()) == 0 + # verify conj with out parameter + z_out = z.astype(np.complex64) + assert z_out is z.conj(out=z_out) + assert la.norm(z.get().conj() - z_out.get()) < 1e-7 # verify contiguity is preserved for order in ["C", "F"]: