diff --git a/pyopencl/array.py b/pyopencl/array.py index b06dd70679651fe14fcc1c195c154cee67e3cc3c..bf58c965c69d69820a405de7a90eb7cd1e74bc87 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -82,6 +82,10 @@ except Exception: return False +class InconsistentOpenCLQueueWarning(UserWarning): + pass + + class VecLookupWarner: def __getattr__(self, name): from warnings import warn @@ -144,7 +148,11 @@ def elwise_kernel_runner(kernel_getter): def kernel_runner(*args, **kwargs): repr_ary = args[0] - queue = kwargs.pop("queue", None) or repr_ary.queue + queue = kwargs.pop("queue", None) + implicit_queue = queue is None + if implicit_queue: + queue = repr_ary.queue + wait_for = kwargs.pop("wait_for", None) # wait_for must be a copy, because we modify it in-place below @@ -171,6 +179,16 @@ def elwise_kernel_runner(kernel_getter): actual_args.append(arg.base_data) actual_args.append(arg.offset) wait_for.extend(arg.events) + + if (implicit_queue + and arg.queue is not None + and arg.queue != queue): + from warnings import warn + + warn("Implicit queue in elementwise operation does not match " + "queue of a provided argument. This will become an " + "error in 2021.", + type=InconsistentOpenCLQueueWarning) else: actual_args.append(arg) actual_args.append(repr_ary.size) @@ -1008,7 +1026,7 @@ class Array: result.add_event( self._axpbyz(result, self.dtype.type(1), self, - other.dtype.type(-1), other)) + result.dtype.type(-1), other)) return result else: @@ -1031,7 +1049,7 @@ class Array: # other must be a scalar result = self._new_like_me(common_dtype) result.add_event( - self._axpbz(result, self.dtype.type(-1), self, + self._axpbz(result, result.dtype.type(-1), self, common_dtype.type(other))) return result diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py index 357aa2bbf17477713905d040376ec199a518f877..df364eda3c883d378c1e9d25136d8f59f5763f9d 100644 --- a/pyopencl/elementwise.py +++ b/pyopencl/elementwise.py @@ -503,36 +503,36 @@ def real_dtype(dtype): @context_dependent_memoize def get_axpbyz_kernel(context, dtype_x, dtype_y, dtype_z): - ax = "a*x[i]" - by = "b*y[i]" + result_t = dtype_to_ctype(dtype_z) x_is_complex = dtype_x.kind == "c" y_is_complex = dtype_y.kind == "c" - if x_is_complex: - ax = "%s_mul(a, x[i])" % complex_dtype_to_name(dtype_x) - - if y_is_complex: - by = "%s_mul(b, y[i])" % complex_dtype_to_name(dtype_y) + if dtype_z.kind == "c": + # a and b will always be complex here. + z_ct = complex_dtype_to_name(dtype_z) - if x_is_complex and not y_is_complex: - by = "{}_fromreal({})".format(complex_dtype_to_name(dtype_x), by) + if x_is_complex: + ax = f"{z_ct}_mul(a, {z_ct}_cast(x[i]))" + else: + ax = f"{z_ct}_mulr(a, x[i])" - if not x_is_complex and y_is_complex: - ax = "{}_fromreal({})".format(complex_dtype_to_name(dtype_y), ax) + if y_is_complex: + by = f"{z_ct}_mul(b, {z_ct}_cast(y[i]))" + else: + by = f"{z_ct}_mulr(b, y[i])" - if x_is_complex or y_is_complex: - result = ( - "{root}_add({root}_cast({ax}), {root}_cast({by}))" - .format( - ax=ax, - by=by, - root=complex_dtype_to_name(dtype_z))) + result = f"{z_ct}_add({ax}, {by})" else: + # real-only + + ax = f"a*(({result_t}) x[i])" + by = f"b*(({result_t}) y[i])" + result = f"{ax} + {by}" return get_elwise_kernel(context, - "{tp_z} *z, {tp_x} a, {tp_x} *x, {tp_y} b, {tp_y} *y".format( + "{tp_z} *z, {tp_z} a, {tp_x} *x, {tp_z} b, {tp_y} *y".format( tp_x=dtype_to_ctype(dtype_x), tp_y=dtype_to_ctype(dtype_y), tp_z=dtype_to_ctype(dtype_z), diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000000000000000000000000000000000..f2a2f6894081711b89214e24c18a5104f99db607 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +markers= + bitonic: tests involving bitonic sort diff --git a/test/test_array.py b/test/test_array.py index 39f8fd74e572c49e19d1614d1c352fb625f5553b..d17772375a64f9d236568bef72005637e95d181a 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -426,12 +426,20 @@ def test_addition_scalar(ctx_factory): assert (7 + a == a_added).all() -def test_substract_array(ctx_factory): +@pytest.mark.parametrize(("dtype_a", "dtype_b"), + [ + (np.float32, np.float32), + (np.float32, np.int32), + (np.int32, np.int32), + (np.int64, np.int32), + (np.int64, np.uint32), + ]) +def test_subtract_array(ctx_factory, dtype_a, dtype_b): """Test the substraction of two arrays.""" #test data - a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float32) + a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(dtype_a) b = np.array([10, 20, 30, 40, 50, - 60, 70, 80, 90, 100]).astype(np.float32) + 60, 70, 80, 90, 100]).astype(dtype_b) context = ctx_factory() queue = cl.CommandQueue(context)