From 23a7cdfd5a742d2e9c2d4608297553c448bd2075 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 31 Jul 2022 17:44:52 -0500 Subject: [PATCH 1/3] get_scalar_op_kernel: take in scalar's dtype --- pycuda/elementwise.py | 4 ++-- pycuda/gpuarray.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index fac12221..7fcc956d 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -755,13 +755,13 @@ def get_where_kernel(crit_dtype, dtype): @context_dependent_memoize -def get_scalar_op_kernel(dtype_x, dtype_y, operator): +def get_scalar_op_kernel(dtype_x, dtype_a, dtype_y, operator): return get_elwise_kernel( "%(tp_x)s *x, %(tp_a)s a, %(tp_y)s *y" % { "tp_x": dtype_to_ctype(dtype_x), + "tp_a": dtype_to_ctype(dtype_a), "tp_y": dtype_to_ctype(dtype_y), - "tp_a": dtype_to_ctype(dtype_x), }, "y[i] = x[i] %s a" % operator, "scalarop_kernel", diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index b4d2e6cd..57163e3b 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -168,7 +168,9 @@ def _make_binary_op(operator): return result else: # scalar operator result = self._new_like_me() - func = elementwise.get_scalar_op_kernel(self.dtype, result.dtype, operator) + func = elementwise.get_scalar_op_kernel(self.dtype, + np.dtype(type(other)), + result.dtype, operator) func.prepared_async_call( self._grid, self._block, -- GitLab From 3316fcab8ba944f6d453acd2d3897946a0f71a4f Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 31 Jul 2022 17:52:38 -0500 Subject: [PATCH 2/3] implements logical operations --- doc/array.rst | 12 ++++++ pycuda/elementwise.py | 12 ++++++ pycuda/gpuarray.py | 91 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) diff --git a/doc/array.rst b/doc/array.rst index 9b286a2e..adbd9cfb 100644 --- a/doc/array.rst +++ b/doc/array.rst @@ -356,6 +356,18 @@ Constructing :class:`GPUArray` Instances Join a sequence of arrays along a new axis. +.. function:: logical_and(x1, x2, /, out=None, * allocator=None) + + Returns the elementwise logical AND values of *x1* and *x2*. + +.. function:: logical_or(x1, x2, /, out=None, * allocator=None) + + Returns the elementwise logical OR values of *x1* and *x2*. + +.. function:: logical_not(x, /, out=None, * allocator=None) + + Returns the elementwise logical NOT of *x*. + Conditionals ^^^^^^^^^^^^ diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 7fcc956d..629c4406 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -766,3 +766,15 @@ def get_scalar_op_kernel(dtype_x, dtype_a, dtype_y, operator): "y[i] = x[i] %s a" % operator, "scalarop_kernel", ) + + +@context_dependent_memoize +def get_logical_not_kernel(dtype_x, dtype_out): + return get_elwise_kernel( + [ + VectorArg(dtype_x, "x"), + VectorArg(dtype_out, "out"), + ], + "out[i] = (x[i] == 0)", + "logical_not", + ) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 57163e3b..420e5d7a 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -2082,4 +2082,95 @@ subset_max = _make_subset_minmax_kernel("max") # }}} + +# {{{ logical ops + +def _logical_op(x1, x2, out, allocator, operator): + assert operator in ["&&", "||"] + allocator = ( + allocator + or getattr(x1, "allocator", None) + or getattr(x2, "allocator", None) + or drv.mem_alloc) + + if np.isscalar(x1) and np.isscalar(x2): + if out is None: + out = empty(shape=(), dtype=np.bool_, allocator=allocator) + + if operator == "&&": + out[:] = np.logical_and(x1, x2) + else: + out[:] = np.logical_or(x1, x2) + elif np.isscalar(x1) or np.isscalar(x2): + scalar_arg, = [x for x in (x1, x2) if np.isscalar(x)] + ary_arg, = [x for x in (x1, x2) if not np.isscalar(x)] + if not isinstance(ary_arg, GPUArray): + raise ValueError("logical_and can take either scalar or GPUArrays" + " as inputs") + + out = out or ary_arg._new_like_me(dtype=np.bool_) + + assert out.shape == ary_arg.shape and out.dtype == np.bool_ + + func = elementwise.get_scalar_op_kernel(ary_arg.dtype, + np.dtype(type(scalar_arg)), + out.dtype, + operator) + + func.prepared_async_call(out._grid, out._block, + None, + ary_arg.gpudata, + scalar_arg, + out.gpudata, + out.mem_size) + else: + if not (isinstance(x1, GPUArray) and isinstance(x2, GPUArray)): + raise ValueError("logical_and can take either scalar or GPUArrays" + " as inputs") + if x1.shape != x2.shape: + raise NotImplementedError("Broadcasting not supported") + + if out is None: + out = x1._new_like_me(dtype=np.bool_) + + assert out.shape == x1.shape and out.dtype == np.bool_ + + func = elementwise.get_binary_op_kernel( + x1.dtype, x2.dtype, out.dtype, operator + ) + func.prepared_async_call(out._grid, out._block, + None, + x1.gpudata, + x2.gpudata, + out.gpudata, + out.mem_size) + + return out + + +def logical_and(x1, x2, /, out=None, *, allocator=None): + return _logical_op(x1, x2, out, allocator, "&&") + + +def logical_or(x1, x2, /, out=None, *, allocator=None): + return _logical_op(x1, x2, out, allocator, "||") + + +def logical_not(x, /, out=None, *, allocator=drv.mem_alloc): + if np.isscalar(x): + out = out or empty(shape=(), dtype=np.bool_, allocator=allocator) + out[:] = np.logical_not(x) + else: + out = out or empty(shape=x.shape, dtype=np.bool_, allocator=allocator) + func = elementwise.get_logical_not_kernel(x.dtype, out.dtype) + func.prepared_async_call(out._grid, out._block, + None, + x.gpudata, + out.gpudata, + out.mem_size) + + return out + +# }}} + # vim: foldmethod=marker -- GitLab From f050a7777f9440478305491e3961e9172982d0a6 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sun, 31 Jul 2022 18:15:39 -0500 Subject: [PATCH 3/3] tests logical operators --- test/test_gpuarray.py | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index a97ed463..1d618e1d 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -1322,6 +1322,50 @@ class TestGPUArray: assert new_z.dtype == np.complex64 assert new_z.shape == arr.shape + def test_logical_and_or(self): + rng = np.random.default_rng(seed=0) + for op in ["logical_and", "logical_or"]: + x_np = rng.random((10, 4)) + y_np = rng.random((10, 4)) + zeros_np = np.zeros((10, 4)) + ones_np = np.ones((10, 4)) + + x_cu = gpuarray.to_gpu(x_np) + y_cu = gpuarray.to_gpu(y_np) + zeros_cu = gpuarray.zeros((10, 4), "float64") + ones_cu = gpuarray.ones((10, 4)) + + np.testing.assert_array_equal( + getattr(gpuarray, op)(x_cu, y_cu).get(), + getattr(np, op)(x_np, y_np)) + np.testing.assert_array_equal( + getattr(gpuarray, op)(x_cu, ones_cu).get(), + getattr(np, op)(x_np, ones_np)) + np.testing.assert_array_equal( + getattr(gpuarray, op)(x_cu, zeros_cu).get(), + getattr(np, op)(x_np, zeros_np)) + np.testing.assert_array_equal( + getattr(gpuarray, op)(x_cu, 1.0).get(), + getattr(np, op)(x_np, ones_np)) + np.testing.assert_array_equal( + getattr(gpuarray, op)(x_cu, 0.0).get(), + getattr(np, op)(x_np, 0.0)) + + def test_logical_not(self): + rng = np.random.default_rng(seed=0) + x_np = rng.random((10, 4)) + x_cu = gpuarray.to_gpu(x_np) + + np.testing.assert_array_equal( + gpuarray.logical_not(x_cu).get(), + np.logical_not(x_np)) + np.testing.assert_array_equal( + gpuarray.logical_not(gpuarray.zeros(10, "float64")).get(), + np.logical_not(np.zeros(10))) + np.testing.assert_array_equal( + gpuarray.logical_not(gpuarray.ones(10)).get(), + np.logical_not(np.ones(10))) + if __name__ == "__main__": # make sure that import failures get reported, instead of skipping the tests. -- GitLab