diff --git a/doc/array.rst b/doc/array.rst index 9b286a2e1396dcb8b803964a2abbb52914cb5290..adbd9cfb08bf9a9f6f3db46c15efc5d986d3d237 100644 --- a/doc/array.rst +++ b/doc/array.rst @@ -356,6 +356,18 @@ Constructing :class:`GPUArray` Instances Join a sequence of arrays along a new axis. +.. function:: logical_and(x1, x2, /, out=None, * allocator=None) + + Returns the elementwise logical AND values of *x1* and *x2*. + +.. function:: logical_or(x1, x2, /, out=None, * allocator=None) + + Returns the elementwise logical OR values of *x1* and *x2*. + +.. function:: logical_not(x, /, out=None, * allocator=None) + + Returns the elementwise logical NOT of *x*. + Conditionals ^^^^^^^^^^^^ diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index fac12221b1bf7cb9c0020585dfe19858f6873fee..629c440608ac7c48a3026d3e1c635aa3c5564dce 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -755,14 +755,26 @@ def get_where_kernel(crit_dtype, dtype): @context_dependent_memoize -def get_scalar_op_kernel(dtype_x, dtype_y, operator): +def get_scalar_op_kernel(dtype_x, dtype_a, dtype_y, operator): return get_elwise_kernel( "%(tp_x)s *x, %(tp_a)s a, %(tp_y)s *y" % { "tp_x": dtype_to_ctype(dtype_x), + "tp_a": dtype_to_ctype(dtype_a), "tp_y": dtype_to_ctype(dtype_y), - "tp_a": dtype_to_ctype(dtype_x), }, "y[i] = x[i] %s a" % operator, "scalarop_kernel", ) + + +@context_dependent_memoize +def get_logical_not_kernel(dtype_x, dtype_out): + return get_elwise_kernel( + [ + VectorArg(dtype_x, "x"), + VectorArg(dtype_out, "out"), + ], + "out[i] = (x[i] == 0)", + "logical_not", + ) diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index b4d2e6cdf44d754621fda1d079b3f768d0642dc7..420e5d7ad96e6eca54cdfeda25c1d21e21391146 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -168,7 +168,9 @@ def _make_binary_op(operator): return result else: # scalar operator result = self._new_like_me() - func = elementwise.get_scalar_op_kernel(self.dtype, result.dtype, operator) + func = elementwise.get_scalar_op_kernel(self.dtype, + np.dtype(type(other)), + result.dtype, operator) func.prepared_async_call( self._grid, self._block, @@ -2080,4 +2082,95 @@ subset_max = _make_subset_minmax_kernel("max") # }}} + +# {{{ logical ops + +def _logical_op(x1, x2, out, allocator, operator): + assert operator in ["&&", "||"] + allocator = ( + allocator + or getattr(x1, "allocator", None) + or getattr(x2, "allocator", None) + or drv.mem_alloc) + + if np.isscalar(x1) and np.isscalar(x2): + if out is None: + out = empty(shape=(), dtype=np.bool_, allocator=allocator) + + if operator == "&&": + out[:] = np.logical_and(x1, x2) + else: + out[:] = np.logical_or(x1, x2) + elif np.isscalar(x1) or np.isscalar(x2): + scalar_arg, = [x for x in (x1, x2) if np.isscalar(x)] + ary_arg, = [x for x in (x1, x2) if not np.isscalar(x)] + if not isinstance(ary_arg, GPUArray): + raise ValueError("logical_and can take either scalar or GPUArrays" + " as inputs") + + out = out or ary_arg._new_like_me(dtype=np.bool_) + + assert out.shape == ary_arg.shape and out.dtype == np.bool_ + + func = elementwise.get_scalar_op_kernel(ary_arg.dtype, + np.dtype(type(scalar_arg)), + out.dtype, + operator) + + func.prepared_async_call(out._grid, out._block, + None, + ary_arg.gpudata, + scalar_arg, + out.gpudata, + out.mem_size) + else: + if not (isinstance(x1, GPUArray) and isinstance(x2, GPUArray)): + raise ValueError("logical_and can take either scalar or GPUArrays" + " as inputs") + if x1.shape != x2.shape: + raise NotImplementedError("Broadcasting not supported") + + if out is None: + out = x1._new_like_me(dtype=np.bool_) + + assert out.shape == x1.shape and out.dtype == np.bool_ + + func = elementwise.get_binary_op_kernel( + x1.dtype, x2.dtype, out.dtype, operator + ) + func.prepared_async_call(out._grid, out._block, + None, + x1.gpudata, + x2.gpudata, + out.gpudata, + out.mem_size) + + return out + + +def logical_and(x1, x2, /, out=None, *, allocator=None): + return _logical_op(x1, x2, out, allocator, "&&") + + +def logical_or(x1, x2, /, out=None, *, allocator=None): + return _logical_op(x1, x2, out, allocator, "||") + + +def logical_not(x, /, out=None, *, allocator=drv.mem_alloc): + if np.isscalar(x): + out = out or empty(shape=(), dtype=np.bool_, allocator=allocator) + out[:] = np.logical_not(x) + else: + out = out or empty(shape=x.shape, dtype=np.bool_, allocator=allocator) + func = elementwise.get_logical_not_kernel(x.dtype, out.dtype) + func.prepared_async_call(out._grid, out._block, + None, + x.gpudata, + out.gpudata, + out.mem_size) + + return out + +# }}} + # vim: foldmethod=marker diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index a97ed463c9e67123f6ae18d7dcc52ab8e5df75f5..1d618e1ddde85c93f38be00092f536e8d96cb0dd 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -1322,6 +1322,50 @@ class TestGPUArray: assert new_z.dtype == np.complex64 assert new_z.shape == arr.shape + def test_logical_and_or(self): + rng = np.random.default_rng(seed=0) + for op in ["logical_and", "logical_or"]: + x_np = rng.random((10, 4)) + y_np = rng.random((10, 4)) + zeros_np = np.zeros((10, 4)) + ones_np = np.ones((10, 4)) + + x_cu = gpuarray.to_gpu(x_np) + y_cu = gpuarray.to_gpu(y_np) + zeros_cu = gpuarray.zeros((10, 4), "float64") + ones_cu = gpuarray.ones((10, 4)) + + np.testing.assert_array_equal( + getattr(gpuarray, op)(x_cu, y_cu).get(), + getattr(np, op)(x_np, y_np)) + np.testing.assert_array_equal( + getattr(gpuarray, op)(x_cu, ones_cu).get(), + getattr(np, op)(x_np, ones_np)) + np.testing.assert_array_equal( + getattr(gpuarray, op)(x_cu, zeros_cu).get(), + getattr(np, op)(x_np, zeros_np)) + np.testing.assert_array_equal( + getattr(gpuarray, op)(x_cu, 1.0).get(), + getattr(np, op)(x_np, ones_np)) + np.testing.assert_array_equal( + getattr(gpuarray, op)(x_cu, 0.0).get(), + getattr(np, op)(x_np, 0.0)) + + def test_logical_not(self): + rng = np.random.default_rng(seed=0) + x_np = rng.random((10, 4)) + x_cu = gpuarray.to_gpu(x_np) + + np.testing.assert_array_equal( + gpuarray.logical_not(x_cu).get(), + np.logical_not(x_np)) + np.testing.assert_array_equal( + gpuarray.logical_not(gpuarray.zeros(10, "float64")).get(), + np.logical_not(np.zeros(10))) + np.testing.assert_array_equal( + gpuarray.logical_not(gpuarray.ones(10)).get(), + np.logical_not(np.ones(10))) + if __name__ == "__main__": # make sure that import failures get reported, instead of skipping the tests.