Skip to content
Commits on Source (6)
Subproject commit 3702fb119804dddde5eaf4a254822b891a947104
Subproject commit 381e9410e2b380ec44920ca10b4252506ab507d0
......@@ -38,6 +38,7 @@ def _add_cuda_libdir_to_dll_path():
nvcc_path = _search_on_path(["nvcc.exe"])
if nvcc_path is not None:
os.add_dll_directory(dirname(nvcc_path))
return
from warnings import warn
......
......@@ -27,6 +27,7 @@ OTHER DEALINGS IN THE SOFTWARE.
from pycuda.tools import context_dependent_memoize
from typing import Any
import numpy as np
from pycuda.tools import dtype_to_ctype, VectorArg, ScalarArg
from pytools import memoize_method
......@@ -462,6 +463,11 @@ def get_linear_combination_kernel(summand_descriptors, dtype_z):
return func, tex_src
def _get_real_dtype(dtype: "np.dtype[Any]") -> "np.dtype[Any]":
assert dtype.kind == "c"
return np.empty(0, dtype).real.dtype
@context_dependent_memoize
def get_axpbyz_kernel(dtype_x, dtype_y, dtype_z,
x_is_scalar=False, y_is_scalar=False):
......@@ -472,10 +478,29 @@ def get_axpbyz_kernel(dtype_x, dtype_y, dtype_z,
:arg y_is_scalar: A :class:`bool` which is *True* only if `y` is a scalar :class:`gpuarray`.
"""
out_t = dtype_to_ctype(dtype_z)
# {{{ cast real scalars in context of complex scalar arithmetic
if dtype_z.kind == "c" and dtype_x.kind != "c":
dtype_z_real = _get_real_dtype(dtype_z)
x_t = dtype_to_ctype(dtype_z_real)
else:
x_t = out_t
if dtype_z.kind == "c" and dtype_y.kind != "c":
dtype_z_real = _get_real_dtype(dtype_z)
y_t = dtype_to_ctype(dtype_z_real)
else:
y_t = out_t
# }}}
x = "x[0]" if x_is_scalar else "x[i]"
ax = f"a*(({out_t}) {x})"
a = f"({x_t}) a"
ax = f"{a}*(({x_t}) {x})"
y = "y[0]" if y_is_scalar else "y[i]"
by = f"b*(({out_t}) {y})"
b = f"({y_t}) b"
by = f"({b})*(({y_t}) {y})"
result = f"{ax} + {by}"
return get_elwise_kernel(
"%(tp_x)s a, %(tp_x)s *x, %(tp_y)s b, %(tp_y)s *y, %(tp_z)s *z"
......@@ -508,8 +533,29 @@ def get_binary_op_kernel(dtype_x, dtype_y, dtype_z, operator,
:arg x_is_scalar: A :class:`bool` which is *True* only if `x` is a scalar :class:`gpuarray`.
:arg y_is_scalar: A :class:`bool` which is *True* only if `y` is a scalar :class:`gpuarray`.
"""
out_t = dtype_to_ctype(dtype_z)
# {{{ cast real scalars in context of complex scalar arithmetic
if dtype_z.kind == "c" and dtype_x.kind != "c":
dtype_z_real = _get_real_dtype(dtype_z)
x_t = dtype_to_ctype(dtype_z_real)
else:
x_t = out_t
if dtype_z.kind == "c" and dtype_y.kind != "c":
dtype_z_real = _get_real_dtype(dtype_z)
y_t = dtype_to_ctype(dtype_z_real)
else:
y_t = out_t
# }}}
x = "x[0]" if x_is_scalar else "x[i]"
x = f"({x_t}) {x}"
y = "y[0]" if y_is_scalar else "y[i]"
y = f"({y_t}) {y}"
result = f"{x} {operator} {y}"
return get_elwise_kernel(
"%(tp_x)s *x, %(tp_y)s *y, %(tp_z)s *z"
......@@ -518,7 +564,7 @@ def get_binary_op_kernel(dtype_x, dtype_y, dtype_z, operator,
"tp_y": dtype_to_ctype(dtype_y),
"tp_z": dtype_to_ctype(dtype_z),
},
f"z[i] = {result}",
f"z[i] = ({out_t}) {result}",
"multiply",
)
......
......@@ -11,6 +11,7 @@ import pycuda.gpuarray as gpuarray
import pycuda.driver as drv
from pycuda.compiler import SourceModule
import pytest
import operator
@pytest.fixture(autouse=True)
......@@ -18,6 +19,23 @@ def init_cuda_context():
yield from init_cuda_context_fixture()
def get_random_array(rng, shape, dtype):
dtype = np.dtype(dtype)
if dtype.kind == "f":
return rng.random(shape, dtype)
elif dtype.kind in "il":
return rng.integers(-42, 42, shape, dtype)
elif dtype.kind in "u":
return rng.integers(0, 42, shape, dtype)
elif dtype.kind == "c":
real_dtype = np.empty(0, dtype).real.dtype
return (dtype.type(1j) * get_random_array(rng, shape, real_dtype)
+ get_random_array(rng, shape, real_dtype))
else:
raise NotImplementedError(f"dtype = {dtype}")
@pytest.mark.cuda
class TestGPUArray:
def test_pow_array(self):
......@@ -1434,6 +1452,33 @@ class TestGPUArray:
np.testing.assert_allclose(cumath.log10(x_cu).get(), np.log10(x_np),
rtol=rtol)
@pytest.mark.parametrize("ldtype", [np.int32, np.int64,
np.float32, np.float64,
np.complex64, np.complex128])
@pytest.mark.parametrize("rdtype", [np.int32, np.int64,
np.float32, np.float64,
np.complex64, np.complex128])
@pytest.mark.parametrize("op", [operator.add, operator.sub, operator.mul,
operator.truediv])
def test_binary_ops_with_unequal_dtypes(self, ldtype, rdtype, op):
# See https://github.com/inducer/pycuda/issues/372
if op == operator.truediv and {ldtype, rdtype} <= {np.int32, np.int64}:
pytest.xfail("Enable after"
" gitlab.tiker.net/inducer/pycuda/-/merge_requests/66"
"is merged.")
rng = np.random.default_rng(0)
lop_np = get_random_array(rng, (10, 4), ldtype)
rop_np = get_random_array(rng, (10, 4), rdtype)
expected_result = op(lop_np, rop_np)
result = op(gpuarray.to_gpu(lop_np), gpuarray.to_gpu(rop_np)).get()
assert result.dtype == expected_result.dtype
assert result.shape == expected_result.shape
np.testing.assert_allclose(expected_result, result,
rtol=5e-5)
if __name__ == "__main__":
# make sure that import failures get reported, instead of skipping the tests.
......