diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 36a39dc3d16e6fae90cf5ec5aabb269457a234d3..298771346360221958b7ad8826ccc472705f83fd 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -228,6 +228,18 @@ class GPUArray: strides = _f_contiguous_strides(dtype.itemsize, shape) elif order == "C": strides = _c_contiguous_strides(dtype.itemsize, shape) + elif order == "A": + # similar to https://numpy.org/doc/stable/reference/generated/numpy.array.html + if base.flags.f_contiguous: + strides = _f_contiguous_strides(dtype.itemsize, shape) + else: + strides = _c_contiguous_strides(dtype.itemsize, shape) + elif order == 'K': + # refer to https://github.com/numpy/numpy/blob/9b84c1174125cb32a6be1bb6151782f8b2beda55/doc/neps/nep-0010-new-iterator-ufunc.rst + if base.flags.c_contiguous: + strides = _c_contiguous_strides(dtype.itemsize, shape) + else: + strides = _f_contiguous_strides(dtype.itemsize, shape) else: raise ValueError("invalid order: %s" % order) else: @@ -895,7 +907,9 @@ class GPUArray: shape = tuple(shape[0]) same_contiguity = (order == "C" and self.flags.c_contiguous) or ( - order == "F" and self.flags.f_contiguous + order == "F" and self.flags.f_contiguous) or ( + order == "A" and (self.flags.c_contiguous or self.flags.f_contiguous)) or ( + order == "K" and (self.flags.c_contiguous or self.flags_f_contiguous) ) if shape == self.shape and same_contiguity: @@ -919,7 +933,7 @@ class GPUArray: dtype=self.dtype, allocator=self.allocator, base=self, - gpudata=int(self.gpudata), + gpudata=0 if self.gpudata == None else int(self.gpudata), order=order, ) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 73ec3ade3f99e8830882b6271aadd70945e4b4f3..4d956b752b8929e6f36edcbac81ff4de66d309c4 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -913,12 +913,28 @@ class TestGPUArray: assert throws_exception # with order specified - a_gpu = a_gpu.reshape((4, 32), order="C") - assert a_gpu.flags.c_contiguous - a_gpu = a_gpu.reshape(4, 32, order="F") - assert a_gpu.flags.f_contiguous - a_gpu = a_gpu.reshape((4, 32), order="F") - assert a_gpu.flags.f_contiguous + a_gpu_C = a_gpu.reshape((4, 32), order="C") + a_C = a.reshape((4, 32), order="C") + np.testing.assert_allclose(a_gpu_C.get(), a_C) + a_gpu_NC = a_gpu_C.T + a_gpu_new_F = a_gpu_NC.reshape(128) + a_NC = a_C.T + a_new_F = a_NC.reshape(128) + np.testing.assert_allclose(a_gpu_new_F.get(), a_new_F) + a_gpu_F = a_gpu.reshape((4, 32), order="F") + a_F = a.reshape((4, 32), order="F") + np.testing.assert_allclose(a_gpu_F.get(), a_F) + a_gpu_NF = a_gpu_F.T + a_gpu_new_C = a_gpu_NF.reshape(128) + a_NF = a_F.T + a_new_C = a_NF.reshape(128) + np.testing.assert_allclose(a_gpu_new_C.get(), a_new_C) + a_gpu_A = a_gpu.reshape((4, 32), order="A") + a_A = a.reshape((4, 32), order="A") + np.testing.assert_allclose(a_gpu_A.get(), a_A) + a_gpu_K = a_gpu.reshape((4, 32), order="K") + a_K = a.reshape((4, 32), order="K") + np.testing.assert_allclose(a_gpu_K.get(), a_K) # default is C-contiguous a_gpu = a_gpu.reshape((4, 32)) assert a_gpu.flags.c_contiguous