diff --git a/pycuda/compiler.py b/pycuda/compiler.py index 96aa1b19e1b84a16854feb197731cebf6ffc2b69..369b6375774677965437d7f0d33c815d37f58a0b 100644 --- a/pycuda/compiler.py +++ b/pycuda/compiler.py @@ -211,7 +211,7 @@ def _find_pycuda_include_path(): return os.path.abspath( os.path.join(importlib.util.find_spec("pycuda").origin, - os.path.pardir, "cuda")) + os.path.join(os.path.pardir, "cuda"))) DEFAULT_NVCC_FLAGS = [ diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 7d633011ce727f549b5a7ce929a0f5594de2ad3a..83fd779a19d79fc375c387e8852359a851bc8683 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -565,7 +565,7 @@ def get_binary_func_scalar_kernel(func, dtype_x, dtype_y, dtype_z): def get_binary_minmax_kernel(func, dtype_x, dtype_y, dtype_z, use_scalar): - if np.float64 not in [dtype_x, dtype_y]: + if (np.float64 not in [dtype_x, dtype_y]) and (bool not in [dtype_x, dtype_y]): func = func + "f" if any(dt.kind == "f" for dt in [dtype_x, dtype_y, dtype_z]): diff --git a/pycuda/gpuarray.py b/pycuda/gpuarray.py index 100d21cc7a75325d93295bdc8a89d0bf3e439b01..893a802e97ec4f8e35036060d8f5bc4342ba16f9 100644 --- a/pycuda/gpuarray.py +++ b/pycuda/gpuarray.py @@ -2019,7 +2019,17 @@ def where(criterion, then_, else_, out=None, stream=None): def _make_binary_minmax_func(which): def f(a, b, out=None, stream=None): - if isinstance(a, GPUArray) and isinstance(b, GPUArray): + allocator = ( + getattr(a, "allocator", None) + or getattr(b, "allocator", None) + or drv.mem_alloc) + + if np.isscalar(a) and np.isscalar(b): + if out is None: + out = GPUArray(shape=(), dtype=np.bool_, allocator=allocator) + import pycuda.gpuarray as gpuarray + out[...] = gpuarray.to_gpu(np.array(getattr(np, "f"+which)(a, b))) + elif isinstance(a, GPUArray) and isinstance(b, GPUArray): if out is None: out = empty_like(a) func = elementwise.get_binary_minmax_kernel( diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 7091fee9481c52f4109ea1085a0d2cb0fa9765dc..62791e7d9d7d52693b50f944b634926040180214 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -11,6 +11,7 @@ import pycuda.gpuarray as gpuarray import pycuda.driver as drv from pycuda.compiler import SourceModule import pytest +import itertools @pytest.fixture(autouse=True) @@ -809,6 +810,14 @@ class TestGPUArray: assert la.norm(max_a_b_gpu.get() - np.maximum(a, b)) == 0 assert la.norm(min_a_b_gpu.get() - np.minimum(a, b)) == 0 + @pytest.mark.parametrize("func", ["minimum", "maximum"]) + def test_min_max_elemwise_on_scalars(self, func): + + for a, b in itertools.product([False, True], [False, True]): + result_ref = getattr(np, func)(a, b) + result = getattr(gpuarray, func)(a, b) + np.testing.assert_array_equal(result.get(), result_ref) + def test_take_put(self): for n in [5, 17, 333]: one_field_size = 8