From 8f46a48d5cc2607ca68583a8b8934f0f7ba70c4f Mon Sep 17 00:00:00 2001 From: "Gregory R. Lee" Date: Tue, 18 Jul 2017 14:34:50 -0400 Subject: [PATCH 1/6] FIX: make GPUArray .imag, .real and .conj() preserve contiguity --- pycuda/gpuarray.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 896b2fca..7d1df7cc 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -213,7 +213,6 @@ class GPUArray(object): assert base is None else: self.gpudata = gpudata - self.base = base self._grid, self._block = splay(self.mem_size) @@ -375,7 +374,7 @@ class GPUArray(object): return out - def _new_like_me(self, dtype=None): + def _new_like_me(self, dtype=None, order="C"): strides = None if dtype is None: dtype = self.dtype @@ -384,7 +383,7 @@ class GPUArray(object): strides = self.strides return self.__class__(self.shape, dtype, - allocator=self.allocator, strides=strides) + allocator=self.allocator, strides=strides, order=order) # operators --------------------------------------------------------------- def mul_add(self, selffac, other, otherfac, add_timer=None, stream=None): @@ -900,8 +899,11 @@ class GPUArray(object): if issubclass(dtype.type, np.complexfloating): from pytools import match_precision real_dtype = match_precision(np.dtype(np.float64), dtype) - - result = self._new_like_me(dtype=real_dtype) + if self.flags.f_contiguous: + order = "F" + else: + order = "C" + result = self._new_like_me(dtype=real_dtype, order=order) func = elementwise.get_real_kernel(dtype, real_dtype) func.prepared_async_call(self._grid, self._block, None, @@ -922,8 +924,11 @@ class GPUArray(object): from pytools import match_precision real_dtype = match_precision(np.dtype(np.float64), dtype) - - result = self._new_like_me(dtype=real_dtype) + if self.flags.f_contiguous: + order = "F" + else: + order = "C" + result = self._new_like_me(dtype=real_dtype, order=order) func = elementwise.get_imag_kernel(dtype, real_dtype) func.prepared_async_call(self._grid, self._block, None, @@ -941,7 +946,11 @@ class GPUArray(object): raise RuntimeError("only contiguous arrays may " "be used as arguments to this operation") - result = self._new_like_me() + if self.flags.f_contiguous: + order = "F" + else: + order = "C" + result = self._new_like_me(order=order) func = elementwise.get_conj_kernel(dtype) func.prepared_async_call(self._grid, self._block, None, -- GitLab From ebe96acae146182a089e85c26f406e63c1e284eb Mon Sep 17 00:00:00 2001 From: "Gregory R. Lee" Date: Tue, 18 Jul 2017 15:02:32 -0400 Subject: [PATCH 2/6] ENH: add order kwarg to GPUArray.reshape --- pycuda/gpuarray.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 7d1df7cc..374a3693 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -679,7 +679,7 @@ class GPUArray(object): return result - def reshape(self, *shape): + def reshape(self, *shape, order="C"): """Gives a new shape to an array without changing its data.""" # TODO: add more error-checking, perhaps @@ -711,7 +711,8 @@ class GPUArray(object): dtype=self.dtype, allocator=self.allocator, base=self, - gpudata=int(self.gpudata)) + gpudata=int(self.gpudata), + order=order) def ravel(self): return self.reshape(self.size) -- GitLab From fa2b5d40f5851bc24180c31d812ed7ff99f20718 Mon Sep 17 00:00:00 2001 From: "Gregory R. Lee" Date: Tue, 18 Jul 2017 18:23:03 -0400 Subject: [PATCH 3/6] fix reshape argument handling for Python 2.x compatibility add missing contiguity check and add reshape order tests --- pycuda/gpuarray.py | 10 ++++++++-- test/test_gpuarray.py | 11 +++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 374a3693..d158608c 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -679,9 +679,12 @@ class GPUArray(object): return result - def reshape(self, *shape, order="C"): + def reshape(self, *shape, **kwargs): """Gives a new shape to an array without changing its data.""" + # Python 2.x compatibility: use kwargs instead of named 'order' keyword + order = kwargs.pop("order", "C") + # TODO: add more error-checking, perhaps if not self.flags.forc: raise RuntimeError("only contiguous arrays may " @@ -690,7 +693,10 @@ class GPUArray(object): if isinstance(shape[0], tuple) or isinstance(shape[0], list): shape = tuple(shape[0]) - if shape == self.shape: + same_contiguity = ((order == "C" and self.flags.c_contiguous) or + (order == "F" and self.flags.f_contiguous)) + + if shape == self.shape and same_contiguity: return self if -1 in shape: diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index faae3048..585dae7a 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -779,6 +779,17 @@ class TestGPUArray: throws_exception = True assert throws_exception + # with order specified + a_gpu = a_gpu.reshape((4, 32), order='C') + assert a_gpu.flags.c_contiguous + a_gpu = a_gpu.reshape(4, 32, order='F') + assert a_gpu.flags.f_contiguous + a_gpu = a_gpu.reshape((4, 32), order='F') + assert a_gpu.flags.f_contiguous + # default is C-contiguous + a_gpu = a_gpu.reshape((4, 32)) + assert a_gpu.flags.c_contiguous + @mark_cuda_test def test_view(self): a = np.arange(128).reshape(8, 16).astype(np.float32) -- GitLab From 1e2c425c5314e95c1f2449bbee52f50ce9aa649c Mon Sep 17 00:00:00 2001 From: "Gregory R. Lee" Date: Tue, 18 Jul 2017 19:04:27 -0400 Subject: [PATCH 4/6] support both dtype and order arguments in zeros_like and empty_like as present in numpy order = 'K' default behavior is needed for the .imag and .real properties of GPUArray to properly preserve the array order when the component is zero-valued. These options have been present in the numpy-equivalent functions since version 1.6. --- pycuda/gpuarray.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index d158608c..74ba7e28 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -1005,22 +1005,35 @@ empty = GPUArray def zeros(shape, dtype, allocator=drv.mem_alloc, order="C"): """Returns an array of the given shape and dtype filled with 0's.""" - result = GPUArray(shape, dtype, allocator, order=order) zero = np.zeros((), dtype) result.fill(zero) return result -def empty_like(other_ary): +def empty_like(other_ary, dtype=None, order='K'): + if order == 'K': + if other_ary.flags.f_contiguous: + order = "F" + else: + order = "C" + if dtype is None: + dtype = other_ary.dtype result = GPUArray( - other_ary.shape, other_ary.dtype, other_ary.allocator) + other_ary.shape, dtype, other_ary.allocator, order=order) return result -def zeros_like(other_ary): +def zeros_like(other_ary, dtype=None, order='K'): + if order == 'K': + if other_ary.flags.f_contiguous: + order = "F" + else: + order = "C" + if dtype is None: + dtype = other_ary.dtype result = GPUArray( - other_ary.shape, other_ary.dtype, other_ary.allocator) + other_ary.shape, dtype, other_ary.allocator, order=order) zero = np.zeros((), result.dtype) result.fill(zero) return result -- GitLab From 6947d65ebd1f3ec08c96bcb476389f8ac21d84ae Mon Sep 17 00:00:00 2001 From: "Gregory R. Lee" Date: Tue, 18 Jul 2017 19:11:46 -0400 Subject: [PATCH 5/6] add ordering preservation tests for the GPUArray complex attributes --- test/test_gpuarray.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 585dae7a..39326eed 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -691,6 +691,25 @@ class TestGPUArray: assert la.norm(z.get().imag - z.imag.get()) == 0 assert la.norm(z.get().conj() - z.conj().get()) == 0 + # verify contiguity is preserved + for order in ["C", "F"]: + # test both zero and non-zero value code paths + z_real = gpuarray.zeros(z.shape, dtype=real_dtype, + order=order) + z2 = z.reshape(z.shape, order=order) + for zdata in [z_real, z2]: + if order == "C": + assert zdata.flags.c_contiguous == True + assert zdata.real.flags.c_contiguous == True + assert zdata.imag.flags.c_contiguous == True + assert zdata.conj().flags.c_contiguous == True + elif order == "F": + assert zdata.flags.f_contiguous == True + assert zdata.real.flags.f_contiguous == True + assert zdata.imag.flags.f_contiguous == True + assert zdata.conj().flags.f_contiguous == True + + @mark_cuda_test def test_pass_slice_to_kernel(self): mod = SourceModule(""" -- GitLab From 23cd92c3ee2e9cd184b9fd1e7cefa3b2210e68b1 Mon Sep 17 00:00:00 2001 From: "Gregory R. Lee" Date: Tue, 18 Jul 2017 19:16:42 -0400 Subject: [PATCH 6/6] add ones_like numpy equivalent to gpuarray --- pycuda/gpuarray.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 74ba7e28..eda944d8 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -1039,6 +1039,21 @@ def zeros_like(other_ary, dtype=None, order='K'): return result +def ones_like(other_ary, dtype=None, order='K'): + if order == 'K': + if other_ary.flags.f_contiguous: + order = "F" + else: + order = "C" + if dtype is None: + dtype = other_ary.dtype + result = GPUArray( + other_ary.shape, dtype, other_ary.allocator, order=order) + one = np.ones((), result.dtype) + result.fill(one) + return result + + def arange(*args, **kwargs): """Create an array filled with numbers spaced `step` apart, starting from `start` and ending at `stop`. -- GitLab