From 1dcaf262964fda21e362a10d064663df169d50a2 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 26 Aug 2022 16:30:21 -0500 Subject: [PATCH 1/2] handle real casting logic for complex arith ops --- pycuda/elementwise.py | 52 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/pycuda/elementwise.py b/pycuda/elementwise.py index 7d633011..5486d097 100644 --- a/pycuda/elementwise.py +++ b/pycuda/elementwise.py @@ -27,6 +27,7 @@ OTHER DEALINGS IN THE SOFTWARE. from pycuda.tools import context_dependent_memoize +from typing import Any import numpy as np from pycuda.tools import dtype_to_ctype, VectorArg, ScalarArg from pytools import memoize_method @@ -462,6 +463,11 @@ def get_linear_combination_kernel(summand_descriptors, dtype_z): return func, tex_src +def _get_real_dtype(dtype: np.dtype[Any]) -> np.dtype[Any]: + assert dtype.kind == "c" + return np.empty(0, dtype).real.dtype + + @context_dependent_memoize def get_axpbyz_kernel(dtype_x, dtype_y, dtype_z, x_is_scalar=False, y_is_scalar=False): @@ -472,10 +478,29 @@ def get_axpbyz_kernel(dtype_x, dtype_y, dtype_z, :arg y_is_scalar: A :class:`bool` which is *True* only if `y` is a scalar :class:`gpuarray`. """ out_t = dtype_to_ctype(dtype_z) + + # {{{ cast real scalars in context of complex scalar arithmetic + + if dtype_z.kind == "c" and dtype_x.kind != "c": + dtype_z_real = _get_real_dtype(dtype_z) + x_t = dtype_to_ctype(dtype_z_real) + else: + x_t = out_t + + if dtype_z.kind == "c" and dtype_y.kind != "c": + dtype_z_real = _get_real_dtype(dtype_z) + y_t = dtype_to_ctype(dtype_z_real) + else: + y_t = out_t + + # }}} + x = "x[0]" if x_is_scalar else "x[i]" - ax = f"a*(({out_t}) {x})" + a = f"({x_t}) a" + ax = f"{a}*(({x_t}) {x})" y = "y[0]" if y_is_scalar else "y[i]" - by = f"b*(({out_t}) {y})" + b = f"({y_t}) b" + by = f"({b})*(({y_t}) {y})" result = f"{ax} + {by}" return get_elwise_kernel( "%(tp_x)s a, %(tp_x)s *x, %(tp_y)s b, %(tp_y)s *y, %(tp_z)s *z" @@ -508,8 +533,29 @@ def get_binary_op_kernel(dtype_x, dtype_y, dtype_z, operator, :arg x_is_scalar: A :class:`bool` which is *True* only if `x` is a scalar :class:`gpuarray`. :arg y_is_scalar: A :class:`bool` which is *True* only if `y` is a scalar :class:`gpuarray`. """ + + out_t = dtype_to_ctype(dtype_z) + + # {{{ cast real scalars in context of complex scalar arithmetic + + if dtype_z.kind == "c" and dtype_x.kind != "c": + dtype_z_real = _get_real_dtype(dtype_z) + x_t = dtype_to_ctype(dtype_z_real) + else: + x_t = out_t + + if dtype_z.kind == "c" and dtype_y.kind != "c": + dtype_z_real = _get_real_dtype(dtype_z) + y_t = dtype_to_ctype(dtype_z_real) + else: + y_t = out_t + + # }}} + x = "x[0]" if x_is_scalar else "x[i]" + x = f"({x_t}) {x}" y = "y[0]" if y_is_scalar else "y[i]" + y = f"({y_t}) {y}" result = f"{x} {operator} {y}" return get_elwise_kernel( "%(tp_x)s *x, %(tp_y)s *y, %(tp_z)s *z" @@ -518,7 +564,7 @@ def get_binary_op_kernel(dtype_x, dtype_y, dtype_z, operator, "tp_y": dtype_to_ctype(dtype_y), "tp_z": dtype_to_ctype(dtype_z), }, - f"z[i] = {result}", + f"z[i] = ({out_t}) {result}", "multiply", ) -- GitLab From f9c821bef0c8f6ea2dd5945600b8f485463dace3 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Fri, 26 Aug 2022 17:17:41 -0500 Subject: [PATCH 2/2] test unequal dtype arith ops (fixes gh-372) --- test/test_gpuarray.py | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/test/test_gpuarray.py b/test/test_gpuarray.py index 7091fee9..6f83f585 100644 --- a/test/test_gpuarray.py +++ b/test/test_gpuarray.py @@ -11,6 +11,7 @@ import pycuda.gpuarray as gpuarray import pycuda.driver as drv from pycuda.compiler import SourceModule import pytest +import operator @pytest.fixture(autouse=True) @@ -18,6 +19,23 @@ def init_cuda_context(): yield from init_cuda_context_fixture() +def get_random_array(rng, shape, dtype): + dtype = np.dtype(dtype) + + if dtype.kind == "f": + return rng.random(shape, dtype) + elif dtype.kind in "il": + return rng.integers(-42, 42, shape, dtype) + elif dtype.kind in "u": + return rng.integers(0, 42, shape, dtype) + elif dtype.kind == "c": + real_dtype = np.empty(0, dtype).real.dtype + return (dtype.type(1j) * get_random_array(rng, shape, real_dtype) + + get_random_array(rng, shape, real_dtype)) + else: + raise NotImplementedError(f"dtype = {dtype}") + + @pytest.mark.cuda class TestGPUArray: def test_pow_array(self): @@ -1434,6 +1452,33 @@ class TestGPUArray: np.testing.assert_allclose(cumath.log10(x_cu).get(), np.log10(x_np), rtol=rtol) + @pytest.mark.parametrize("ldtype", [np.int32, np.int64, + np.float32, np.float64, + np.complex64, np.complex128]) + @pytest.mark.parametrize("rdtype", [np.int32, np.int64, + np.float32, np.float64, + np.complex64, np.complex128]) + @pytest.mark.parametrize("op", [operator.add, operator.sub, operator.mul, + operator.truediv]) + def test_binary_ops_with_unequal_dtypes(self, ldtype, rdtype, op): + # See https://github.com/inducer/pycuda/issues/372 + if op == operator.truediv and {ldtype, rdtype} <= {np.int32, np.int64}: + pytest.xfail("Enable after" + " gitlab.tiker.net/inducer/pycuda/-/merge_requests/66" + "is merged.") + + rng = np.random.default_rng(0) + lop_np = get_random_array(rng, (10, 4), ldtype) + rop_np = get_random_array(rng, (10, 4), rdtype) + + expected_result = op(lop_np, rop_np) + result = op(gpuarray.to_gpu(lop_np), gpuarray.to_gpu(rop_np)).get() + + assert result.dtype == expected_result.dtype + assert result.shape == expected_result.shape + np.testing.assert_allclose(expected_result, result, + rtol=5e-5) + if __name__ == "__main__": # make sure that import failures get reported, instead of skipping the tests. -- GitLab