From 21c604a09742ef9bbb005e788a90bccfc630011f Mon Sep 17 00:00:00 2001 From: lhausamm Date: Mon, 20 Nov 2017 15:09:57 +0100 Subject: [PATCH 1/7] Add test for reverse slicing --- test/test_gpuarray.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 103bba3d..c6e164fd 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -572,6 +572,24 @@ class TestGPUArray: assert la.norm(a_gpu_slice.get()-a_slice) == 0 + @mark_cuda_test + def test_reverse_slice(self): + from pycuda.curandom import rand as curand + + l = 20000 + a_gpu = curand((l,)) + a = a_gpu.get() + + from random import randrange + for i in range(200): + start = randrange(l) + end = randrange(start, l) + + a_gpu_slice = a_gpu[end:start:-2] + a_slice = a[start:end] + + assert la.norm(a_gpu_slice.get()-a_slice) == 0 + @mark_cuda_test def test_2d_slice_c(self): from pycuda.curandom import rand as curand -- GitLab From 57ff6429a8af30f66cdb05c1d812a830c0610ee8 Mon Sep 17 00:00:00 2001 From: lhausamm Date: Tue, 21 Nov 2017 10:31:41 +0100 Subject: [PATCH 2/7] Fix test_reverse_slicing --- test/test_gpuarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index c6e164fd..0a33ea76 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -586,7 +586,7 @@ class TestGPUArray: end = randrange(start, l) a_gpu_slice = a_gpu[end:start:-2] - a_slice = a[start:end] + a_slice = a[end:start:-2] assert la.norm(a_gpu_slice.get()-a_slice) == 0 -- GitLab From acd59352e590a2997db8e66585194db6901b1059 Mon Sep 17 00:00:00 2001 From: lhausamm Date: Fri, 24 Nov 2017 15:00:09 +0100 Subject: [PATCH 3/7] Reverse slice: obtaining the right indices, need to reverse correctly --- pycuda/elementwise.py | 2 +- pycuda/gpuarray.py | 32 ++++++++++++++++++++++++-------- test/test_gpuarray.py | 9 ++++++--- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index feab0a6b..bdd3d09b 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -495,7 +495,7 @@ def get_fill_kernel(dtype): @context_dependent_memoize def get_reverse_kernel(dtype): return get_elwise_kernel( - "%(tp)s *y, %(tp)s *z" % { + "%(tp)s *y, %(tp)s *z, int skip" % { "tp": dtype_to_ctype(dtype), }, "z[i] = y[n-1-i]", diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 99b63969..2a3d3e3f 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -674,16 +674,17 @@ class GPUArray(object): as one-dimensional. """ - if not self.flags.forc: - raise RuntimeError("only contiguous arrays may " - "be used as arguments to this operation") + #if not self.flags.forc: + # raise RuntimeError("only contiguous arrays may " + # "be used as arguments to this operation") result = self._new_like_me() func = elementwise.get_reverse_kernel(self.dtype) + skip = self.strides[0] // self.dtype.itemsize func.prepared_async_call(self._grid, self._block, stream, - self.gpudata, result.gpudata, - self.mem_size) + self.gpudata, result.gpudata, + skip, self.mem_size) return result @@ -856,9 +857,19 @@ class GPUArray(object): start, stop, idx_stride = index_entry.indices( self.shape[array_axis]) + if idx_stride < 0: + # with idx_stride expect stop < start + start, stop = stop, start + # now need to take care of the rounding + n_step = abs(stop - start) + n_step /= abs(idx_stride) + # shift required due to [i:j] does not include j + start = int(stop + n_step * idx_stride + 1) + stop += 1 + array_stride = self.strides[array_axis] - new_shape.append((stop-start-1)//idx_stride+1) + new_shape.append((abs(stop-start)-1)//abs(idx_stride)+1) new_strides.append(idx_stride*array_stride) new_offset += array_stride*start @@ -910,7 +921,7 @@ class GPUArray(object): array_axis += 1 - return GPUArray( + tmp = GPUArray( shape=tuple(new_shape), dtype=self.dtype, allocator=self.allocator, @@ -918,6 +929,11 @@ class GPUArray(object): gpudata=int(self.gpudata)+new_offset, strides=tuple(new_strides)) + if new_strides[0] < 0: + tmp = tmp.reverse() + + return tmp + def __setitem__(self, index, value): _memcpy_discontig(self[index], value) @@ -1297,7 +1313,7 @@ def _memcpy_discontig(dst, src, async=False, stream=None): copy.width_in_bytes = src.dtype.itemsize*shape[0] - copy.src_pitch = src_strides[1] + copy.src_pitch = abs(src_strides[1]) copy.dst_pitch = dst_strides[1] copy.height = shape[1] diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 0a33ea76..b1d4c279 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -576,18 +576,21 @@ class TestGPUArray: def test_reverse_slice(self): from pycuda.curandom import rand as curand - l = 20000 + l = 10 a_gpu = curand((l,)) a = a_gpu.get() from random import randrange for i in range(200): - start = randrange(l) - end = randrange(start, l) + start = 2 + end = 5 a_gpu_slice = a_gpu[end:start:-2] a_slice = a[end:start:-2] + print("GPU", a_gpu_slice.get()) + print("CPU", a_slice) + print("Ref", a) assert la.norm(a_gpu_slice.get()-a_slice) == 0 @mark_cuda_test -- GitLab From d55efde970f840000cd150e3d24f4d19b5fb72b3 Mon Sep 17 00:00:00 2001 From: lhausamm Date: Fri, 24 Nov 2017 15:41:14 +0100 Subject: [PATCH 4/7] Reverse slicing works on special cases --- pycuda/elementwise.py | 18 +++++++++++++----- pycuda/gpuarray.py | 2 ++ test/test_gpuarray.py | 15 ++++++++++----- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index bdd3d09b..d9f2388c 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -495,11 +495,19 @@ def get_fill_kernel(dtype): @context_dependent_memoize def get_reverse_kernel(dtype): return get_elwise_kernel( - "%(tp)s *y, %(tp)s *z, int skip" % { - "tp": dtype_to_ctype(dtype), - }, - "z[i] = y[n-1-i]", - "reverse") + "%(tp)s *y, %(tp)s *z, int skip" % { + "tp": dtype_to_ctype(dtype), + }, + """ + skip = abs(skip); + if (skip == 1) { + z[i] = y[n-1-i]; + } + else { + z[i] = y[n-i*skip]; + } + """, + "reverse") @context_dependent_memoize diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 2a3d3e3f..0e5cb445 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -866,6 +866,7 @@ class GPUArray(object): # shift required due to [i:j] does not include j start = int(stop + n_step * idx_stride + 1) stop += 1 + print(start, stop, idx_stride) array_stride = self.strides[array_axis] @@ -929,6 +930,7 @@ class GPUArray(object): gpudata=int(self.gpudata)+new_offset, strides=tuple(new_strides)) + print("Before", tmp) if new_strides[0] < 0: tmp = tmp.reverse() diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index b1d4c279..566d67b8 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -470,7 +470,7 @@ class TestGPUArray: b = a_cpu.get() for i in range(0, 10): - assert a[len(a)-1-i] == b[i] + assert a[len(a)-1-i] == b[i], "%s, %s" % (a[len(a)-1-i], b[i]) @mark_cuda_test def test_sum(self): @@ -582,12 +582,17 @@ class TestGPUArray: from random import randrange for i in range(200): - start = 2 - end = 5 + start = end = 0 + while end-start < 2: + start = randrange(l) + end = randrange(start, l) - a_gpu_slice = a_gpu[end:start:-2] - a_slice = a[end:start:-2] + step = randrange(1, min(end-start, 20)) + a_gpu_slice = a_gpu[end:start:-step] + a_slice = a[end:start:-step] + + print("Slice", start, end, step) print("GPU", a_gpu_slice.get()) print("CPU", a_slice) print("Ref", a) -- GitLab From 0ed8731d2cf352c22e060e90d6cbffc4f8d6e8f2 Mon Sep 17 00:00:00 2001 From: lhausamm Date: Fri, 24 Nov 2017 17:28:35 +0100 Subject: [PATCH 5/7] Reversing slicing works --- pycuda/gpuarray.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 0e5cb445..52c2bc06 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -857,24 +857,24 @@ class GPUArray(object): start, stop, idx_stride = index_entry.indices( self.shape[array_axis]) + # number of element + n = (abs(stop-start)-1) // idx_stride + 1 + if idx_stride < 0: - # with idx_stride expect stop < start - start, stop = stop, start - # now need to take care of the rounding - n_step = abs(stop - start) - n_step /= abs(idx_stride) - # shift required due to [i:j] does not include j - start = int(stop + n_step * idx_stride + 1) - stop += 1 - print(start, stop, idx_stride) + # compute number of element + n = (abs(stop - start) + abs(idx_stride) - 1) // abs(idx_stride) + # compute boundaries + stop = start + 1 + start = stop - (n - 1) * abs(idx_stride) - 1 array_stride = self.strides[array_axis] - new_shape.append((abs(stop-start)-1)//abs(idx_stride)+1) + new_shape.append(n) new_strides.append(idx_stride*array_stride) new_offset += array_stride*start index_axis += 1 + array_axis += 1 elif isinstance(index_entry, (int, np.integer)): @@ -930,7 +930,6 @@ class GPUArray(object): gpudata=int(self.gpudata)+new_offset, strides=tuple(new_strides)) - print("Before", tmp) if new_strides[0] < 0: tmp = tmp.reverse() -- GitLab From d555d53ae3fda0fd359bb67048cb9f64a70c67ab Mon Sep 17 00:00:00 2001 From: lhausamm Date: Fri, 24 Nov 2017 18:04:30 +0100 Subject: [PATCH 6/7] Reverse slicing working now --- pycuda/elementwise.py | 8 ++------ pycuda/gpuarray.py | 5 +---- test/test_gpuarray.py | 4 ---- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index d9f2388c..e16b71d4 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -500,12 +500,8 @@ def get_reverse_kernel(dtype): }, """ skip = abs(skip); - if (skip == 1) { - z[i] = y[n-1-i]; - } - else { - z[i] = y[n-i*skip]; - } + size_t N = (n-1)*skip; + z[i] = y[N-i*skip]; """, "reverse") diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 52c2bc06..1e1c0fc9 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -674,10 +674,6 @@ class GPUArray(object): as one-dimensional. """ - #if not self.flags.forc: - # raise RuntimeError("only contiguous arrays may " - # "be used as arguments to this operation") - result = self._new_like_me() func = elementwise.get_reverse_kernel(self.dtype) @@ -930,6 +926,7 @@ class GPUArray(object): gpudata=int(self.gpudata)+new_offset, strides=tuple(new_strides)) + print("Before", tmp) if new_strides[0] < 0: tmp = tmp.reverse() diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 566d67b8..201069c6 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -592,10 +592,6 @@ class TestGPUArray: a_gpu_slice = a_gpu[end:start:-step] a_slice = a[end:start:-step] - print("Slice", start, end, step) - print("GPU", a_gpu_slice.get()) - print("CPU", a_slice) - print("Ref", a) assert la.norm(a_gpu_slice.get()-a_slice) == 0 @mark_cuda_test -- GitLab From deaae43f8ae6ac8f39472ed8a30e30299b5c2b8f Mon Sep 17 00:00:00 2001 From: lhausamm Date: Fri, 24 Nov 2017 18:06:27 +0100 Subject: [PATCH 7/7] Remove debugging print --- pycuda/gpuarray.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 1e1c0fc9..9643a0a1 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -926,7 +926,6 @@ class GPUArray(object): gpudata=int(self.gpudata)+new_offset, strides=tuple(new_strides)) - print("Before", tmp) if new_strides[0] < 0: tmp = tmp.reverse() -- GitLab