From dc3e4f77e330760e6e670726f29056cfdcbdfd36 Mon Sep 17 00:00:00 2001 From: mit kotak Date: Tue, 3 Aug 2021 22:09:02 -0500 Subject: [PATCH 01/28] Implemented get_rpow_kernel --- pycuda/elementwise.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 258bae5c..7b705713 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -665,6 +665,32 @@ def get_pow_array_kernel(dtype_x, dtype_y, dtype_z): "pow_method", ) +@context_dependent_memoize +def get_rpow_kernel(dtype_x, dtype_y, dtype_z , is_base_array, is_exp_array): + + if is_base_array: + x = "x[i]" + x_ctype = "%(tp_x)s *x" + else: + x = "x" + x_ctype = "%(tp_x)s x" + + if is_exp_array: + y = "y[i]" + y_ctype = "%(tp_y)s *y" + else: + y = "y" + y_ctype = "%(tp_y)s y" + + + return get_elwise_kernel( + ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) % { + "tp_x": dtype_to_ctype(dtype_x), + "tp_y": dtype_to_ctype(dtype_y), + "tp_z": dtype_to_ctype(dtype_z), + }, + "z[i] = %s" % result, + name="pow_method") @context_dependent_memoize def get_fmod_kernel(): -- GitLab From caafe5d32f3b9d9658ee31d1c890f888e1fea0ad Mon Sep 17 00:00:00 2001 From: mit kotak Date: Tue, 3 Aug 2021 22:09:34 -0500 Subject: [PATCH 02/28] Implemented rpow_scalar and __rpow__ --- pycuda/gpuarray.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index a0bf84c4..b6aa3f8d 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -515,6 +515,12 @@ class GPUArray: return out + def _rpow_scalar(result, base, exponent): + base = np.array(base) + return elementwise.get_rpow_kernel( + base.dtype, exponent.dtype, result.dtype, + is_base_array=False, is_exp_array=True) + def _new_like_me(self, dtype=None, order="C"): strides = None if dtype is None: @@ -839,6 +845,14 @@ class GPUArray: """ return self._pow(other, new=False) + def __rpow__(self, other): + # other must be a scalar + common_dtype = _get_common_dtype(self, other) + result = self._new_like_me(common_dtype) + result.add_event( + self._rpow_scalar(result, common_dtype.type(other), self)) + return result + def reverse(self, stream=None): """Return this array in reversed order. The array is treated as one-dimensional. -- GitLab From d651b4f9c5fe4c2144d765c254b2bd7c673b2ffc Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Wed, 4 Aug 2021 22:04:31 +0000 Subject: [PATCH 03/28] Removed blank spaces --- pycuda/elementwise.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 7b705713..97093cde 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -665,26 +665,23 @@ def get_pow_array_kernel(dtype_x, dtype_y, dtype_z): "pow_method", ) + @context_dependent_memoize -def get_rpow_kernel(dtype_x, dtype_y, dtype_z , is_base_array, is_exp_array): +def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): if is_base_array: - x = "x[i]" x_ctype = "%(tp_x)s *x" else: - x = "x" x_ctype = "%(tp_x)s x" if is_exp_array: - y = "y[i]" y_ctype = "%(tp_y)s *y" else: - y = "y" y_ctype = "%(tp_y)s y" return get_elwise_kernel( - ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) % { + ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) %{ "tp_x": dtype_to_ctype(dtype_x), "tp_y": dtype_to_ctype(dtype_y), "tp_z": dtype_to_ctype(dtype_z), -- GitLab From f75ac04085a6a7237226a593f40db5806287bfae Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Sat, 28 Aug 2021 01:42:15 +0000 Subject: [PATCH 04/28] Adjusted spaces in elementwise.py + Added self to gpuarray.py --- pycuda/elementwise.py | 15 ++++++++------- pycuda/gpuarray.py | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 97093cde..28269579 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -681,13 +681,14 @@ def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): return get_elwise_kernel( - ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) %{ - "tp_x": dtype_to_ctype(dtype_x), - "tp_y": dtype_to_ctype(dtype_y), - "tp_z": dtype_to_ctype(dtype_z), - }, - "z[i] = %s" % result, - name="pow_method") + ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) + %{ + "tp_x": dtype_to_ctype(dtype_x), + "tp_y": dtype_to_ctype(dtype_y), + "tp_z": dtype_to_ctype(dtype_z), + }, + "z[i] = %s" % result, + name="pow_method") @context_dependent_memoize def get_fmod_kernel(): diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index b6aa3f8d..a85367ff 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -515,7 +515,7 @@ class GPUArray: return out - def _rpow_scalar(result, base, exponent): + def _rpow_scalar(self, result, base, exponent): base = np.array(base) return elementwise.get_rpow_kernel( base.dtype, exponent.dtype, result.dtype, -- GitLab From b4a8989cbd3f7ace09e4960640ce82a3625923e8 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Sat, 28 Aug 2021 01:48:06 +0000 Subject: [PATCH 05/28] replaced result with func --- pycuda/elementwise.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 28269579..74233f8e 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -681,15 +681,16 @@ def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): return get_elwise_kernel( - ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) + ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) %{ "tp_x": dtype_to_ctype(dtype_x), "tp_y": dtype_to_ctype(dtype_y), "tp_z": dtype_to_ctype(dtype_z), }, - "z[i] = %s" % result, + "z[i] = %s" % func, name="pow_method") + @context_dependent_memoize def get_fmod_kernel(): return get_elwise_kernel( -- GitLab From 9fb83772d1badc397d5c1fe024fc1c55a35443d1 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Thu, 16 Sep 2021 01:43:56 +0000 Subject: [PATCH 06/28] Defined func in elementwise.get_rpow_kernel --- pycuda/elementwise.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 74233f8e..c7cfecf2 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -671,13 +671,17 @@ def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): if is_base_array: x_ctype = "%(tp_x)s *x" + func = pow(x[i],y) else: x_ctype = "%(tp_x)s x" + func = pow(x,y) if is_exp_array: y_ctype = "%(tp_y)s *y" + func = pow(x,y[i]) else: y_ctype = "%(tp_y)s y" + func = pow(x,y) return get_elwise_kernel( -- GitLab From c84e42781f1b5ac0120ac9c07707a0a24917c15f Mon Sep 17 00:00:00 2001 From: mit kotak Date: Wed, 29 Sep 2021 20:47:24 -0500 Subject: [PATCH 07/28] Fixed typos --- pycuda/elementwise.py | 12 ++++++------ pycuda/gpuarray.py | 7 +------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index c7cfecf2..7e4aef78 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -669,19 +669,19 @@ def get_pow_array_kernel(dtype_x, dtype_y, dtype_z): @context_dependent_memoize def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): - if is_base_array: + if is_base_array and not is_exp_array: x_ctype = "%(tp_x)s *x" - func = pow(x[i],y) + func = "pow(x[i],y)" else: x_ctype = "%(tp_x)s x" - func = pow(x,y) + func = "pow(x,y)" - if is_exp_array: + if is_exp_array and not is_exp_array: y_ctype = "%(tp_y)s *y" - func = pow(x,y[i]) + func = "pow(x,y[i])" else: y_ctype = "%(tp_y)s y" - func = pow(x,y) + func = "pow(x,y)" return get_elwise_kernel( diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index a85367ff..905b496a 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -515,12 +515,6 @@ class GPUArray: return out - def _rpow_scalar(self, result, base, exponent): - base = np.array(base) - return elementwise.get_rpow_kernel( - base.dtype, exponent.dtype, result.dtype, - is_base_array=False, is_exp_array=True) - def _new_like_me(self, dtype=None, order="C"): strides = None if dtype is None: @@ -847,6 +841,7 @@ class GPUArray: def __rpow__(self, other): # other must be a scalar + assert np.isscalar(other) common_dtype = _get_common_dtype(self, other) result = self._new_like_me(common_dtype) result.add_event( -- GitLab From 27864e0face645b8c8465af97ba3c7a0504069ba Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Thu, 30 Sep 2021 01:53:16 +0000 Subject: [PATCH 08/28] Removed rpow_scalar + Fixed typos --- pycuda/elementwise.py | 12 ++++++------ pycuda/gpuarray.py | 7 +------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index c7cfecf2..9781a1ed 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -669,19 +669,19 @@ def get_pow_array_kernel(dtype_x, dtype_y, dtype_z): @context_dependent_memoize def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): - if is_base_array: + if is_base_array and not_exp_array: x_ctype = "%(tp_x)s *x" - func = pow(x[i],y) + func = "pow(x[i],y)" else: x_ctype = "%(tp_x)s x" - func = pow(x,y) + func = "pow(x,y)" - if is_exp_array: + if is_exp_array and not_exp_array: y_ctype = "%(tp_y)s *y" - func = pow(x,y[i]) + func = "pow(x,y[i])" else: y_ctype = "%(tp_y)s y" - func = pow(x,y) + func = "pow(x,y)" return get_elwise_kernel( diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index a85367ff..905b496a 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -515,12 +515,6 @@ class GPUArray: return out - def _rpow_scalar(self, result, base, exponent): - base = np.array(base) - return elementwise.get_rpow_kernel( - base.dtype, exponent.dtype, result.dtype, - is_base_array=False, is_exp_array=True) - def _new_like_me(self, dtype=None, order="C"): strides = None if dtype is None: @@ -847,6 +841,7 @@ class GPUArray: def __rpow__(self, other): # other must be a scalar + assert np.isscalar(other) common_dtype = _get_common_dtype(self, other) result = self._new_like_me(common_dtype) result.add_event( -- GitLab From 6829adbe18b8429df3a6ff44b989a447d0c63d89 Mon Sep 17 00:00:00 2001 From: mit kotak Date: Fri, 8 Oct 2021 20:09:00 -0500 Subject: [PATCH 09/28] Removed _get_scalar_func_loopy_program --- pycuda/elementwise.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 7e4aef78..b2e50ab6 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -672,17 +672,15 @@ def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): if is_base_array and not is_exp_array: x_ctype = "%(tp_x)s *x" func = "pow(x[i],y)" - else: + elif not is_base_array and is_exp_array: x_ctype = "%(tp_x)s x" - func = "pow(x,y)" + func = "pow(x,y[i])" - if is_exp_array and not is_exp_array: + elif is_exp_array and is_exp_array: y_ctype = "%(tp_y)s *y" - func = "pow(x,y[i])" - else: - y_ctype = "%(tp_y)s y" func = "pow(x,y)" - + else: + raise Assertion return get_elwise_kernel( ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) -- GitLab From ab3101592e637e8783a1265127183364c3943db6 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Fri, 22 Oct 2021 04:53:27 +0000 Subject: [PATCH 10/28] Corrected if condition logic in rpow_scalar --- pycuda/elementwise.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 9781a1ed..65a57d5d 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -669,20 +669,19 @@ def get_pow_array_kernel(dtype_x, dtype_y, dtype_z): @context_dependent_memoize def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): - if is_base_array and not_exp_array: + #XOR check that raises an Assertion when both are scalar + raise AssertionError if not (is_exp_array or is_base_array) + if is_base_array and not is_exp_array: x_ctype = "%(tp_x)s *x" func = "pow(x[i],y)" - else: - x_ctype = "%(tp_x)s x" - func = "pow(x,y)" - if is_exp_array and not_exp_array: + if not is_base_array and is_exp_array: y_ctype = "%(tp_y)s *y" func = "pow(x,y[i])" - else: - y_ctype = "%(tp_y)s y" - func = "pow(x,y)" + if is_base_array and exp_array: + x_ctype = "%(tp_x)s x" + func = "pow(x,y)" return get_elwise_kernel( ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) -- GitLab From 34af6ea30dd243d0f8353971ef4ca2a6d51b8bea Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Fri, 22 Oct 2021 05:07:15 +0000 Subject: [PATCH 11/28] test for rpow_array --- test/test_gpuarray.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index dbf4b2f7..f9411d17 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -39,6 +39,31 @@ class TestGPUArray: a_gpu **= 2 a_gpu = a_gpu.get() assert (np.abs(a ** 2 - a_gpu) < 1e-3).all() + @mark_cuda_test + + def test_rpow_array(self): + scalar = 2 + a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float32) + a_gpu = gpuarray.to_gpu(a) + + result = rpow(a_gpu, scalar).get() + assert (np.abs(a ** scalar - result) < 1e-3).all() + + a_gpu **= scalar + a_gpu = a_gpu.get() + assert (np.abs(a ** scalar - a_gpu) < 1e-3).all() + + result = rpow(scalar, a_gpu).get() + assert (np.abs(scalar ** a - result) < 1e-3).all() + + a_gpu = a_gpu.get() + assert (np.abs(scalar ** a - scalar ** a_gpu) < 1e-3).all() + + result = rpow(a_gpu, a_gpu).get() + assert (np.abs(a ** a - result) < 1e-3).all() + + a_gpu = a_gpu.get() + assert (np.abs(a ** a - a_gpu ** a_gpu) < 1e-3).all() @mark_cuda_test def test_numpy_integer_shape(self): -- GitLab From bf005c183e0eac0924ccbda6a48fadc92c7bfaf4 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Fri, 22 Oct 2021 05:13:24 +0000 Subject: [PATCH 12/28] Corrected assert error typo --- pycuda/elementwise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 65a57d5d..cefa0c7d 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -670,7 +670,7 @@ def get_pow_array_kernel(dtype_x, dtype_y, dtype_z): def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): #XOR check that raises an Assertion when both are scalar - raise AssertionError if not (is_exp_array or is_base_array) + assert if not (is_exp_array or is_base_array) if is_base_array and not is_exp_array: x_ctype = "%(tp_x)s *x" func = "pow(x[i],y)" -- GitLab From 9b6e730334c5b00656764f85ddcb1b90234bde31 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Fri, 22 Oct 2021 05:22:39 +0000 Subject: [PATCH 13/28] Removed awkard mark_cuda space --- test/test_gpuarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index f9411d17..b119e66e 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -39,8 +39,8 @@ class TestGPUArray: a_gpu **= 2 a_gpu = a_gpu.get() assert (np.abs(a ** 2 - a_gpu) < 1e-3).all() - @mark_cuda_test + @mark_cuda_test def test_rpow_array(self): scalar = 2 a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float32) -- GitLab From 15536d63e0547f9dae7276229ddbc432df930a98 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Fri, 22 Oct 2021 05:32:33 +0000 Subject: [PATCH 14/28] Changed format for Assertion Error --- pycuda/elementwise.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index cefa0c7d..d650597b 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -670,7 +670,9 @@ def get_pow_array_kernel(dtype_x, dtype_y, dtype_z): def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): #XOR check that raises an Assertion when both are scalar - assert if not (is_exp_array or is_base_array) + if not (is_exp_array or is_base_array): + raise AssertionError + if is_base_array and not is_exp_array: x_ctype = "%(tp_x)s *x" func = "pow(x[i],y)" -- GitLab From 7b9a5a33fbc3aa020b49828714bb92d847ba2126 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Fri, 22 Oct 2021 06:30:34 +0000 Subject: [PATCH 15/28] rpow_kernel separate conditions for all cases --- pycuda/gpuarray.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 905b496a..5b9e0c34 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -840,14 +840,42 @@ class GPUArray: return self._pow(other, new=False) def __rpow__(self, other): - # other must be a scalar - assert np.isscalar(other) common_dtype = _get_common_dtype(self, other) result = self._new_like_me(common_dtype) + + if np.isscalar(other): + base = np.array(common_dtype.type(other)) + result.add_event( + elementwise.get_rpow_kernel( + base.dtype, self.dtype, result.dtype, + is_base_array=False, is_exp_array=True) + ) + return result + + if np.isscalar(self): + exponent = np.array(common_dtype.type(self)) + result.add_event( + elementwise.get_rpow_kernel( + base.dtype, self.dtype, result.dtype, + is_base_array=True, is_exp_array=False) + ) + return result + result.add_event( - self._rpow_scalar(result, common_dtype.type(other), self)) + elementwise.get_rpow_kernel( + base.dtype, self.dtype, result.dtype, + is_base_array=True, is_exp_array=True) + ) return result + + def _rpow_scalar(self, result, base, exponent): + base = np.array(base) + return elementwise.get_rpow_kernel( + base.dtype, exponent.dtype, result.dtype, + is_base_array=False, is_exp_array=True) + + def reverse(self, stream=None): """Return this array in reversed order. The array is treated as one-dimensional. -- GitLab From a8b99bf98c7e9bbe6e15657d96d56f40b1068a81 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Fri, 29 Oct 2021 23:38:36 +0000 Subject: [PATCH 16/28] Resolved typo + optimization issues in pycuda/ elementiwise.py --- pycuda/elementwise.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index d650597b..bec7d688 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -669,21 +669,20 @@ def get_pow_array_kernel(dtype_x, dtype_y, dtype_z): @context_dependent_memoize def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): - #XOR check that raises an Assertion when both are scalar - if not (is_exp_array or is_base_array): - raise AssertionError - - if is_base_array and not is_exp_array: - x_ctype = "%(tp_x)s *x" - func = "pow(x[i],y)" - if not is_base_array and is_exp_array: y_ctype = "%(tp_y)s *y" func = "pow(x,y[i])" - if is_base_array and exp_array: + elif is_base_array and exp_array: x_ctype = "%(tp_x)s x" - func = "pow(x,y)" + func = "pow(x[i],y[i])" + + elif is_base_array and not is_exp_array: + x_ctype = "%(tp_x)s *x" + func = "pow(x[i],y)" + + else: + raise AssertionError return get_elwise_kernel( ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) -- GitLab From e3efef690e9965060b2f7e3aae79d5029e024478 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Sat, 30 Oct 2021 00:33:51 +0000 Subject: [PATCH 17/28] Removed if conditions in __rpow__ --- pycuda/gpuarray.py | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 5b9e0c34..ce93fc46 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -842,40 +842,14 @@ class GPUArray: def __rpow__(self, other): common_dtype = _get_common_dtype(self, other) result = self._new_like_me(common_dtype) - - if np.isscalar(other): - base = np.array(common_dtype.type(other)) - result.add_event( - elementwise.get_rpow_kernel( - base.dtype, self.dtype, result.dtype, - is_base_array=False, is_exp_array=True) - ) - return result - - if np.isscalar(self): - exponent = np.array(common_dtype.type(self)) - result.add_event( - elementwise.get_rpow_kernel( - base.dtype, self.dtype, result.dtype, - is_base_array=True, is_exp_array=False) - ) - return result - + base = np.array(common_dtype.type(other)) result.add_event( elementwise.get_rpow_kernel( base.dtype, self.dtype, result.dtype, - is_base_array=True, is_exp_array=True) + is_base_array= not np.isscalar(self), is_exp_array= not np.isscalar(other)) ) return result - - def _rpow_scalar(self, result, base, exponent): - base = np.array(base) - return elementwise.get_rpow_kernel( - base.dtype, exponent.dtype, result.dtype, - is_base_array=False, is_exp_array=True) - - def reverse(self, stream=None): """Return this array in reversed order. The array is treated as one-dimensional. -- GitLab From d1c545b65f19972e4f7703bbb9641b84932f623d Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Sat, 30 Oct 2021 00:49:12 +0000 Subject: [PATCH 18/28] Fixed test_pow_array to call __rpow__ --- test/test_gpuarray.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index b119e66e..12cc0743 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -46,24 +46,14 @@ class TestGPUArray: a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float32) a_gpu = gpuarray.to_gpu(a) - result = rpow(a_gpu, scalar).get() - assert (np.abs(a ** scalar - result) < 1e-3).all() - - a_gpu **= scalar - a_gpu = a_gpu.get() - assert (np.abs(a ** scalar - a_gpu) < 1e-3).all() - - result = rpow(scalar, a_gpu).get() + result = (scalar ** a_gpu).get() assert (np.abs(scalar ** a - result) < 1e-3).all() - a_gpu = a_gpu.get() - assert (np.abs(scalar ** a - scalar ** a_gpu) < 1e-3).all() - - result = rpow(a_gpu, a_gpu).get() + result = (a_gpu ** a_gpu).get() assert (np.abs(a ** a - result) < 1e-3).all() - a_gpu = a_gpu.get() - assert (np.abs(a ** a - a_gpu ** a_gpu) < 1e-3).all() + result = (a_gpu ** scalar).get() + assert (np.abs(a ** scalar - result) < 1e-3).all() @mark_cuda_test def test_numpy_integer_shape(self): -- GitLab From 6ee4f8197aaefa152cb2f380c2132d4a64deaed8 Mon Sep 17 00:00:00 2001 From: mit kotak Date: Fri, 29 Oct 2021 20:19:46 -0500 Subject: [PATCH 19/28] Fixed Flake8 issues --- pycuda/elementwise.py | 13 ++++++------- pycuda/gpuarray.py | 3 ++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index b2e50ab6..caad3560 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -671,20 +671,19 @@ def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): if is_base_array and not is_exp_array: x_ctype = "%(tp_x)s *x" - func = "pow(x[i],y)" + func = "pow(x[i],y)" elif not is_base_array and is_exp_array: x_ctype = "%(tp_x)s x" - func = "pow(x,y[i])" - - elif is_exp_array and is_exp_array: + func = "pow(x,y[i])" + elif is_base_array and is_exp_array: y_ctype = "%(tp_y)s *y" - func = "pow(x,y)" + func = "pow(x,y)" else: - raise Assertion + raise AssertionError return get_elwise_kernel( ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) - %{ + % { "tp_x": dtype_to_ctype(dtype_x), "tp_y": dtype_to_ctype(dtype_y), "tp_z": dtype_to_ctype(dtype_z), diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 905b496a..a3ff90b4 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -845,7 +845,8 @@ class GPUArray: common_dtype = _get_common_dtype(self, other) result = self._new_like_me(common_dtype) result.add_event( - self._rpow_scalar(result, common_dtype.type(other), self)) + self._rpow_scalar(result, common_dtype.type(other), self) + ) return result def reverse(self, stream=None): -- GitLab From 8968000caf313256298992eda413e10680704833 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 29 Oct 2021 21:39:56 -0500 Subject: [PATCH 20/28] collective hacking during meeting --- pycuda/elementwise.py | 6 ++++++ pycuda/gpuarray.py | 15 +++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 764182c0..2357b132 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -668,17 +668,23 @@ def get_pow_array_kernel(dtype_x, dtype_y, dtype_z): @context_dependent_memoize def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): + """ + Returns the kernel for the operation: ``z = x ** y`` + """ if not is_base_array and is_exp_array: + x_ctype = "%(tp_x)s x" y_ctype = "%(tp_y)s *y" func = "pow(x,y[i])" elif is_base_array and is_exp_array: x_ctype = "%(tp_x)s x" + y_ctype = "%(tp_y)s *y" func = "pow(x[i],y[i])" elif is_base_array and not is_exp_array: x_ctype = "%(tp_x)s *x" + y_ctype = "%(tp_y)s y" func = "pow(x[i],y)" else: diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index c17eadb7..94fdc4f3 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -842,12 +842,15 @@ class GPUArray: def __rpow__(self, other): common_dtype = _get_common_dtype(self, other) result = self._new_like_me(common_dtype) - base = np.array(common_dtype.type(other)) - result.add_event( - elementwise.get_rpow_kernel( - base.dtype, self.dtype, result.dtype, - is_base_array=not np.isscalar(self), is_exp_array=not np.isscalar(other)) - ) + # FIXME: We are assuming that 'base' is always scalar. Get rid of that + # assumptions. + base = common_dtype.type(other) + func = elementwise.get_rpow_kernel( + base.dtype, self.dtype, result.dtype, + is_base_array=not np.isscalar(other), is_exp_array=True) + func.prepared_async_call(self._grid, self._block, None, + result.gpudata, base, self.gpudata, + self.mem_size) return result def reverse(self, stream=None): -- GitLab From 9f4b97ba889fbb25d0b113b766401e0398dfe5e0 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Sat, 6 Nov 2021 12:30:25 +0000 Subject: [PATCH 21/28] Removed basearray is scalar assumption --- pycuda/gpuarray.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 94fdc4f3..dcc63c7b 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -842,12 +842,13 @@ class GPUArray: def __rpow__(self, other): common_dtype = _get_common_dtype(self, other) result = self._new_like_me(common_dtype) - # FIXME: We are assuming that 'base' is always scalar. Get rid of that - # assumptions. - base = common_dtype.type(other) + if np.isscalar(other): + base = common_dtype.type(other) + else: + base = common_dtype.type(self) func = elementwise.get_rpow_kernel( base.dtype, self.dtype, result.dtype, - is_base_array=not np.isscalar(other), is_exp_array=True) + is_base_array=not np.isscalar(other), is_exp_array=not np.isscalar(self)) func.prepared_async_call(self._grid, self._block, None, result.gpudata, base, self.gpudata, self.mem_size) -- GitLab From cbb8007596533c7336ed23f01be44278514b1460 Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Sat, 6 Nov 2021 12:41:20 +0000 Subject: [PATCH 22/28] Tried fixing a ** a issue --- pycuda/gpuarray.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index dcc63c7b..dbcea0bc 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -842,10 +842,10 @@ class GPUArray: def __rpow__(self, other): common_dtype = _get_common_dtype(self, other) result = self._new_like_me(common_dtype) - if np.isscalar(other): - base = common_dtype.type(other) - else: + if not np.isscalar(other): base = common_dtype.type(self) + else: + base = common_dtype.type(other) func = elementwise.get_rpow_kernel( base.dtype, self.dtype, result.dtype, is_base_array=not np.isscalar(other), is_exp_array=not np.isscalar(self)) -- GitLab From 5ccd0dd44e891cfcd9d205d7c09ab1332d51d026 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 6 Nov 2021 09:36:06 -0500 Subject: [PATCH 23/28] some more collective hacking during the meeting --- pycuda/elementwise.py | 3 ++- pycuda/gpuarray.py | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 2357b132..18672826 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -671,6 +671,7 @@ def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): """ Returns the kernel for the operation: ``z = x ** y`` """ + # FIXME: This needs to be more precise in its invocation of pow/powf. if not is_base_array and is_exp_array: x_ctype = "%(tp_x)s x" @@ -678,7 +679,7 @@ def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): func = "pow(x,y[i])" elif is_base_array and is_exp_array: - x_ctype = "%(tp_x)s x" + x_ctype = "%(tp_x)s *x" y_ctype = "%(tp_y)s *y" func = "pow(x[i],y[i])" diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index dbcea0bc..5e7a5607 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -842,15 +842,21 @@ class GPUArray: def __rpow__(self, other): common_dtype = _get_common_dtype(self, other) result = self._new_like_me(common_dtype) + if not np.isscalar(other): - base = common_dtype.type(self) + # Base is a gpuarray => do not cast. + base = other else: base = common_dtype.type(other) + func = elementwise.get_rpow_kernel( base.dtype, self.dtype, result.dtype, is_base_array=not np.isscalar(other), is_exp_array=not np.isscalar(self)) + # Evaluates z = x ** y func.prepared_async_call(self._grid, self._block, None, - result.gpudata, base, self.gpudata, + result.gpudata, # z + base if np.isscalar(base) else base.gpudata, # x + self.gpudata, # y self.mem_size) return result -- GitLab From dae3cb1f56a237cb24bdfa750ca5bec7e21a5922 Mon Sep 17 00:00:00 2001 From: mit kotak Date: Fri, 12 Nov 2021 18:42:49 -0600 Subject: [PATCH 24/28] Increased precision using pow/powf --- pycuda/elementwise.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 18672826..607e9f9c 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -671,22 +671,25 @@ def get_rpow_kernel(dtype_x, dtype_y, dtype_z, is_base_array, is_exp_array): """ Returns the kernel for the operation: ``z = x ** y`` """ - # FIXME: This needs to be more precise in its invocation of pow/powf. + if np.float64 in [dtype_x, dtype_y]: + func = "pow" + else: + func = "powf" if not is_base_array and is_exp_array: x_ctype = "%(tp_x)s x" y_ctype = "%(tp_y)s *y" - func = "pow(x,y[i])" + func = "%s(x,y[i])" % func elif is_base_array and is_exp_array: x_ctype = "%(tp_x)s *x" y_ctype = "%(tp_y)s *y" - func = "pow(x[i],y[i])" + func = "%s(x[i],y[i])" % func elif is_base_array and not is_exp_array: x_ctype = "%(tp_x)s *x" y_ctype = "%(tp_y)s y" - func = "pow(x[i],y)" + func = "%s(x[i],y)" % func else: raise AssertionError -- GitLab From 932b4bc0cb1d0e20b9832bfead1a1a060d912f95 Mon Sep 17 00:00:00 2001 From: mit kotak Date: Fri, 12 Nov 2021 19:43:25 -0600 Subject: [PATCH 25/28] Modified test_rpow float32 -> float64 --- test/test_gpuarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 12cc0743..12ed9f83 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -43,7 +43,7 @@ class TestGPUArray: @mark_cuda_test def test_rpow_array(self): scalar = 2 - a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float32) + a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float64) a_gpu = gpuarray.to_gpu(a) result = (scalar ** a_gpu).get() -- GitLab From c52bddfaa0957cb263d3bfe3866ab4b760d4a131 Mon Sep 17 00:00:00 2001 From: mit kotak Date: Mon, 22 Nov 2021 19:48:31 -0600 Subject: [PATCH 26/28] Added assert np.testing.assert_allclose() --- test/test_gpuarray.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 12ed9f83..7ecd01f6 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -42,18 +42,18 @@ class TestGPUArray: @mark_cuda_test def test_rpow_array(self): - scalar = 2 - a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float64) + scalar = np.random.rand() + a = abs(np.random.rand(10).astype(np.float64)) a_gpu = gpuarray.to_gpu(a) result = (scalar ** a_gpu).get() - assert (np.abs(scalar ** a - result) < 1e-3).all() + assert np.testing.assert_allclose(scalar ** a, result) result = (a_gpu ** a_gpu).get() - assert (np.abs(a ** a - result) < 1e-3).all() + assert np.testing.assert_allclose(a ** a, result) result = (a_gpu ** scalar).get() - assert (np.abs(a ** scalar - result) < 1e-3).all() + assert np.testing.assert_allclose(a ** scalar, result) @mark_cuda_test def test_numpy_integer_shape(self): -- GitLab From 978c30e8accf5f6f60eff17d401399fce5f52a2e Mon Sep 17 00:00:00 2001 From: Mit Kotak Date: Tue, 23 Nov 2021 02:04:00 +0000 Subject: [PATCH 27/28] Removed AssertionError: assert None --- test/test_gpuarray.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 7ecd01f6..51e118dc 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -47,13 +47,13 @@ class TestGPUArray: a_gpu = gpuarray.to_gpu(a) result = (scalar ** a_gpu).get() - assert np.testing.assert_allclose(scalar ** a, result) + np.testing.assert_allclose(scalar ** a, result) result = (a_gpu ** a_gpu).get() - assert np.testing.assert_allclose(a ** a, result) + np.testing.assert_allclose(a ** a, result) result = (a_gpu ** scalar).get() - assert np.testing.assert_allclose(a ** scalar, result) + np.testing.assert_allclose(a ** scalar, result) @mark_cuda_test def test_numpy_integer_shape(self): -- GitLab From 6f6bd9b606f540200047356cd6cccedd77fd72e2 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 23 Nov 2021 03:11:32 +0000 Subject: [PATCH 28/28] removes unnecessary type cast --- test/test_gpuarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 51e118dc..5234d8a2 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -43,7 +43,7 @@ class TestGPUArray: @mark_cuda_test def test_rpow_array(self): scalar = np.random.rand() - a = abs(np.random.rand(10).astype(np.float64)) + a = abs(np.random.rand(10)) a_gpu = gpuarray.to_gpu(a) result = (scalar ** a_gpu).get() -- GitLab