diff --git a/doc/source/array.rst b/doc/source/array.rst index fc05a353563e607830ac7732fc47d964b1e6e8e2..35c8166d6da8ed8fedfcc20cd8cde73aebc6598e 100644 --- a/doc/source/array.rst +++ b/doc/source/array.rst @@ -90,7 +90,7 @@ The :class:`GPUArray` Array Class of :attr:`size` instead of its current value. The change was made in order to match :mod:`numpy`. - .. method :: reshape(shape) + .. method :: reshape(shape, order="C") Returns an array containing the same data with a new shape. @@ -273,15 +273,34 @@ Constructing :class:`GPUArray` Instances Same as :func:`empty`, but the :class:`GPUArray` is zero-initialized before being returned. -.. function:: empty_like(other_ary) +.. function:: empty_like(other_ary, dtype=None, order="K") Make a new, uninitialized :class:`GPUArray` having the same properties - as *other_ary*. + as *other_ary*. The *dtype* and *order* attributes allow these aspects to + be set independently of their values in *other_ary*. For *order*, "A" + means retain Fortran-ordering if the input is Fortran-contiguous, otherwise + use "C" ordering. The default, *order* or "K" tries to match the strides + of *other_ary* as closely as possible. -.. function:: zeros_like(other_ary) +.. function:: zeros_like(other_ary, dtype=None, order="K") Make a new, zero-initialized :class:`GPUArray` having the same properties - as *other_ary*. + as *other_ary*. The *dtype* and *order* attributes allow these aspects to + be set independently of their values in *other_ary*. For *order*, "A" + means retain Fortran-ordering if the input is Fortran-contiguous, otherwise + use "C" ordering. The default, *order* or "K" tries to match the strides + of *other_ary* as closely as possible. + +.. function:: ones_like(other_ary, dtype=None, order="K") + + Make a new, ones-initialized :class:`GPUArray` having the same properties + as *other_ary*. The *dtype* and *order* attributes allow these aspects to + be set independently of their values in *other_ary*. For *order*, "A" + means retain Fortran-ordering if the input is Fortran-contiguous, otherwise + use "C" ordering. The default, *order* or "K" tries to match the strides + of *other_ary* as closely as possible. + + .. versionadded: 2017.2 .. function:: arange(start, stop, step, dtype=None, stream=None) diff --git a/doc/source/misc.rst b/doc/source/misc.rst index d8e73dc3ca3a38048232531d6e1b9cdec842f5b3..572eeaa9c47ccf8cb182c6a238711f5e272929dd 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -1,6 +1,18 @@ Changes ======= +Version 2017.2 +-------------- + +* :func:`zeros_like` and :func:`empty_like` now have *dtype* and *order* + arguments as in numpy. Previously these routines always returned a + C-order array. The new default behavior follows the numpy default, which is + to match the order and strides of the input as closely as possible. +* A :func:`ones_like` gpuarray function was added. +* methods :attr:`GPUArray.imag`, :attr:`GPUArray.real`, :meth:`GPUArray.conj` + now all return Fortran-ordered arrays when the :class:`GPUArray` is + Fortran-ordered. + Version 2016.2 -------------- .. note:: diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index eda944d86d2f73200be48646b1c3f498623d39a8..3f6fd32dfd5bd9377286e5336d09d68d82bec4da 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -202,6 +202,7 @@ class GPUArray(object): self.strides = strides self.mem_size = self.size = s self.nbytes = self.dtype.itemsize * self.size + self.itemsize = self.dtype.itemsize self.allocator = allocator if gpudata is None: @@ -1011,44 +1012,57 @@ def zeros(shape, dtype, allocator=drv.mem_alloc, order="C"): return result -def empty_like(other_ary, dtype=None, order='K'): - if order == 'K': - if other_ary.flags.f_contiguous: +def _array_like_helper(other_ary, dtype, order): + """Set order, strides, dtype as in numpy's zero_like. """ + strides = None + if order == "A": + if other_ary.flags.f_contiguous and not other_ary.flags.c_contiguous: order = "F" else: order = "C" + elif order == "K": + if other_ary.flags.c_contiguous or (other_ary.ndim <= 1): + order = "C" + elif other_ary.flags.f_contiguous: + order = "F" + else: + # array_like routines only return positive strides + strides = [np.abs(s) for s in other_ary.strides] + if dtype is not None and dtype != other_ary.dtype: + # scale strides by itemsize when dtype is not the same + itemsize = other_ary.nbytes // other_ary.size + itemsize_ratio = np.dtype(dtype).itemsize / itemsize + strides = [int(s*itemsize_ratio) for s in strides] + elif order not in ["C", "F"]: + raise ValueError("Unsupported order: %r" % order) if dtype is None: dtype = other_ary.dtype + return dtype, order, strides + + +def empty_like(other_ary, dtype=None, order="K"): + dtype, order, strides = _array_like_helper(other_ary, dtype, order) result = GPUArray( - other_ary.shape, dtype, other_ary.allocator, order=order) + other_ary.shape, dtype, other_ary.allocator, order=order, + strides=strides) return result -def zeros_like(other_ary, dtype=None, order='K'): - if order == 'K': - if other_ary.flags.f_contiguous: - order = "F" - else: - order = "C" - if dtype is None: - dtype = other_ary.dtype +def zeros_like(other_ary, dtype=None, order="K"): + dtype, order, strides = _array_like_helper(other_ary, dtype, order) result = GPUArray( - other_ary.shape, dtype, other_ary.allocator, order=order) + other_ary.shape, dtype, other_ary.allocator, order=order, + strides=strides) zero = np.zeros((), result.dtype) result.fill(zero) return result -def ones_like(other_ary, dtype=None, order='K'): - if order == 'K': - if other_ary.flags.f_contiguous: - order = "F" - else: - order = "C" - if dtype is None: - dtype = other_ary.dtype +def ones_like(other_ary, dtype=None, order="K"): + dtype, order, strides = _array_like_helper(other_ary, dtype, order) result = GPUArray( - other_ary.shape, dtype, other_ary.allocator, order=order) + other_ary.shape, dtype, other_ary.allocator, order=order, + strides=strides) one = np.ones((), result.dtype) result.fill(one) return result diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 39326eedb85a9e31eaa748519f59e1c5a3468630..0a3218e1e5b849a9df165bd64dad3b95b709bd2f 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -1085,6 +1085,56 @@ class TestGPUArray: assert np.allclose(a_gpu.get(), a) assert np.allclose(a_gpu[1:3,1:3,1:3].get(), a[1:3,1:3,1:3]) + @mark_cuda_test + def test_zeros_like_etc(self): + shape = (16, 16) + a = np.random.randn(*shape).astype(np.float32) + z = gpuarray.to_gpu(a) + zf = gpuarray.to_gpu(np.asfortranarray(a)) + a_noncontig = np.arange(3*4*5).reshape(3, 4, 5).swapaxes(1, 2) + z_noncontig = gpuarray.to_gpu(a_noncontig) + for func in [gpuarray.empty_like, + gpuarray.zeros_like, + gpuarray.ones_like]: + for arr in [z, zf, z_noncontig]: + + contig = arr.flags.c_contiguous or arr.flags.f_contiguous + + # Output matches order of input. + # Non-contiguous becomes C-contiguous + new_z = func(arr, order="A") + if contig: + assert new_z.flags.c_contiguous == arr.flags.c_contiguous + assert new_z.flags.f_contiguous == arr.flags.f_contiguous + else: + assert new_z.flags.c_contiguous is True + assert new_z.flags.f_contiguous is False + assert new_z.dtype == arr.dtype + assert new_z.shape == arr.shape + + # Force C-ordered output + new_z = func(arr, order="C") + assert new_z.flags.c_contiguous is True + assert new_z.flags.f_contiguous is False + assert new_z.dtype == arr.dtype + assert new_z.shape == arr.shape + + # Force Fortran-orded output + new_z = func(arr, order="F") + assert new_z.flags.c_contiguous is False + assert new_z.flags.f_contiguous is True + assert new_z.dtype == arr.dtype + assert new_z.shape == arr.shape + + # Change the dtype, but otherwise match order & strides + # order = "K" so non-contiguous array remains non-contiguous + new_z = func(arr, dtype=np.complex64, order="K") + assert new_z.flags.c_contiguous == arr.flags.c_contiguous + assert new_z.flags.f_contiguous == arr.flags.f_contiguous + assert new_z.dtype == np.complex64 + assert new_z.shape == arr.shape + + if __name__ == "__main__": # make sure that import failures get reported, instead of skipping the tests. import pycuda.autoinit # noqa