From 23a7cdfd5a742d2e9c2d4608297553c448bd2075 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sun, 31 Jul 2022 17:44:52 -0500
Subject: [PATCH 1/3] get_scalar_op_kernel: take in scalar's dtype

---
 pycuda/elementwise.py | 4 ++--
 pycuda/gpuarray.py    | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py
index fac12221..7fcc956d 100644
--- a/pycuda/elementwise.py
+++ b/pycuda/elementwise.py
@@ -755,13 +755,13 @@ def get_where_kernel(crit_dtype, dtype):
 
 
 @context_dependent_memoize
-def get_scalar_op_kernel(dtype_x, dtype_y, operator):
+def get_scalar_op_kernel(dtype_x, dtype_a, dtype_y, operator):
     return get_elwise_kernel(
         "%(tp_x)s *x, %(tp_a)s a, %(tp_y)s *y"
         % {
             "tp_x": dtype_to_ctype(dtype_x),
+            "tp_a": dtype_to_ctype(dtype_a),
             "tp_y": dtype_to_ctype(dtype_y),
-            "tp_a": dtype_to_ctype(dtype_x),
         },
         "y[i] = x[i] %s a" % operator,
         "scalarop_kernel",
diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py
index b4d2e6cd..57163e3b 100644
--- a/pycuda/gpuarray.py
+++ b/pycuda/gpuarray.py
@@ -168,7 +168,9 @@ def _make_binary_op(operator):
             return result
         else:  # scalar operator
             result = self._new_like_me()
-            func = elementwise.get_scalar_op_kernel(self.dtype, result.dtype, operator)
+            func = elementwise.get_scalar_op_kernel(self.dtype,
+                                                    np.dtype(type(other)),
+                                                    result.dtype, operator)
             func.prepared_async_call(
                 self._grid,
                 self._block,
-- 
GitLab


From 3316fcab8ba944f6d453acd2d3897946a0f71a4f Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sun, 31 Jul 2022 17:52:38 -0500
Subject: [PATCH 2/3] implements logical operations

---
 doc/array.rst         | 12 ++++++
 pycuda/elementwise.py | 12 ++++++
 pycuda/gpuarray.py    | 91 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 115 insertions(+)

diff --git a/doc/array.rst b/doc/array.rst
index 9b286a2e..adbd9cfb 100644
--- a/doc/array.rst
+++ b/doc/array.rst
@@ -356,6 +356,18 @@ Constructing :class:`GPUArray` Instances
 
     Join a sequence of arrays along a new axis.
 
+.. function:: logical_and(x1, x2, /, out=None, * allocator=None)
+
+    Returns the elementwise logical AND values of *x1* and *x2*.
+
+.. function:: logical_or(x1, x2, /, out=None, * allocator=None)
+
+    Returns the elementwise logical OR values of *x1* and *x2*.
+
+.. function:: logical_not(x, /, out=None, * allocator=None)
+
+    Returns the elementwise logical NOT of *x*.
+
 
 Conditionals
 ^^^^^^^^^^^^
diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py
index 7fcc956d..629c4406 100644
--- a/pycuda/elementwise.py
+++ b/pycuda/elementwise.py
@@ -766,3 +766,15 @@ def get_scalar_op_kernel(dtype_x, dtype_a, dtype_y, operator):
         "y[i] = x[i] %s a" % operator,
         "scalarop_kernel",
     )
+
+
+@context_dependent_memoize
+def get_logical_not_kernel(dtype_x, dtype_out):
+    return get_elwise_kernel(
+        [
+            VectorArg(dtype_x, "x"),
+            VectorArg(dtype_out, "out"),
+        ],
+        "out[i] = (x[i] == 0)",
+        "logical_not",
+    )
diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py
index 57163e3b..420e5d7a 100644
--- a/pycuda/gpuarray.py
+++ b/pycuda/gpuarray.py
@@ -2082,4 +2082,95 @@ subset_max = _make_subset_minmax_kernel("max")
 
 # }}}
 
+
+# {{{ logical ops
+
+def _logical_op(x1, x2, out, allocator, operator):
+    assert operator in ["&&", "||"]
+    allocator = (
+        allocator
+        or getattr(x1, "allocator", None)
+        or getattr(x2, "allocator", None)
+        or drv.mem_alloc)
+
+    if np.isscalar(x1) and np.isscalar(x2):
+        if out is None:
+            out = empty(shape=(), dtype=np.bool_, allocator=allocator)
+
+        if operator == "&&":
+            out[:] = np.logical_and(x1, x2)
+        else:
+            out[:] = np.logical_or(x1, x2)
+    elif np.isscalar(x1) or np.isscalar(x2):
+        scalar_arg, = [x for x in (x1, x2) if np.isscalar(x)]
+        ary_arg, = [x for x in (x1, x2) if not np.isscalar(x)]
+        if not isinstance(ary_arg, GPUArray):
+            raise ValueError("logical_and can take either scalar or GPUArrays"
+                             " as inputs")
+
+        out = out or ary_arg._new_like_me(dtype=np.bool_)
+
+        assert out.shape == ary_arg.shape and out.dtype == np.bool_
+
+        func = elementwise.get_scalar_op_kernel(ary_arg.dtype,
+                                                np.dtype(type(scalar_arg)),
+                                                out.dtype,
+                                                operator)
+
+        func.prepared_async_call(out._grid, out._block,
+                                 None,
+                                 ary_arg.gpudata,
+                                 scalar_arg,
+                                 out.gpudata,
+                                 out.mem_size)
+    else:
+        if not (isinstance(x1, GPUArray) and isinstance(x2, GPUArray)):
+            raise ValueError("logical_and can take either scalar or GPUArrays"
+                             " as inputs")
+        if x1.shape != x2.shape:
+            raise NotImplementedError("Broadcasting not supported")
+
+        if out is None:
+            out = x1._new_like_me(dtype=np.bool_)
+
+        assert out.shape == x1.shape and out.dtype == np.bool_
+
+        func = elementwise.get_binary_op_kernel(
+            x1.dtype, x2.dtype, out.dtype, operator
+        )
+        func.prepared_async_call(out._grid, out._block,
+                                 None,
+                                 x1.gpudata,
+                                 x2.gpudata,
+                                 out.gpudata,
+                                 out.mem_size)
+
+    return out
+
+
+def logical_and(x1, x2, /, out=None, *, allocator=None):
+    return _logical_op(x1, x2, out, allocator, "&&")
+
+
+def logical_or(x1, x2, /, out=None, *, allocator=None):
+    return _logical_op(x1, x2, out, allocator, "||")
+
+
+def logical_not(x, /, out=None, *, allocator=drv.mem_alloc):
+    if np.isscalar(x):
+        out = out or empty(shape=(), dtype=np.bool_, allocator=allocator)
+        out[:] = np.logical_not(x)
+    else:
+        out = out or empty(shape=x.shape, dtype=np.bool_, allocator=allocator)
+        func = elementwise.get_logical_not_kernel(x.dtype, out.dtype)
+        func.prepared_async_call(out._grid, out._block,
+                                 None,
+                                 x.gpudata,
+                                 out.gpudata,
+                                 out.mem_size)
+
+    return out
+
+# }}}
+
 # vim: foldmethod=marker
-- 
GitLab


From f050a7777f9440478305491e3961e9172982d0a6 Mon Sep 17 00:00:00 2001
From: Kaushik Kulkarni <kaushikcfd@gmail.com>
Date: Sun, 31 Jul 2022 18:15:39 -0500
Subject: [PATCH 3/3] tests logical operators

---
 test/test_gpuarray.py | 44 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py
index a97ed463..1d618e1d 100644
--- a/test/test_gpuarray.py
+++ b/test/test_gpuarray.py
@@ -1322,6 +1322,50 @@ class TestGPUArray:
                 assert new_z.dtype == np.complex64
                 assert new_z.shape == arr.shape
 
+    def test_logical_and_or(self):
+        rng = np.random.default_rng(seed=0)
+        for op in ["logical_and", "logical_or"]:
+            x_np = rng.random((10, 4))
+            y_np = rng.random((10, 4))
+            zeros_np = np.zeros((10, 4))
+            ones_np = np.ones((10, 4))
+
+            x_cu = gpuarray.to_gpu(x_np)
+            y_cu = gpuarray.to_gpu(y_np)
+            zeros_cu = gpuarray.zeros((10, 4), "float64")
+            ones_cu = gpuarray.ones((10, 4))
+
+            np.testing.assert_array_equal(
+                getattr(gpuarray, op)(x_cu, y_cu).get(),
+                getattr(np, op)(x_np, y_np))
+            np.testing.assert_array_equal(
+                getattr(gpuarray, op)(x_cu, ones_cu).get(),
+                getattr(np, op)(x_np, ones_np))
+            np.testing.assert_array_equal(
+                getattr(gpuarray, op)(x_cu, zeros_cu).get(),
+                getattr(np, op)(x_np, zeros_np))
+            np.testing.assert_array_equal(
+                getattr(gpuarray, op)(x_cu, 1.0).get(),
+                getattr(np, op)(x_np, ones_np))
+            np.testing.assert_array_equal(
+                getattr(gpuarray, op)(x_cu, 0.0).get(),
+                getattr(np, op)(x_np, 0.0))
+
+    def test_logical_not(self):
+        rng = np.random.default_rng(seed=0)
+        x_np = rng.random((10, 4))
+        x_cu = gpuarray.to_gpu(x_np)
+
+        np.testing.assert_array_equal(
+            gpuarray.logical_not(x_cu).get(),
+            np.logical_not(x_np))
+        np.testing.assert_array_equal(
+            gpuarray.logical_not(gpuarray.zeros(10, "float64")).get(),
+            np.logical_not(np.zeros(10)))
+        np.testing.assert_array_equal(
+            gpuarray.logical_not(gpuarray.ones(10)).get(),
+            np.logical_not(np.ones(10)))
+
 
 if __name__ == "__main__":
     # make sure that import failures get reported, instead of skipping the tests.
-- 
GitLab