diff --git a/pyopencl/array.py b/pyopencl/array.py index d3d353d1bca6b70bc6e42127f8ab0da770ded2f4..b06dd70679651fe14fcc1c195c154cee67e3cc3c 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -50,6 +50,25 @@ def _get_common_dtype(obj1, obj2, queue): has_double_support(queue.device)) +def _get_truedivide_dtype(obj1, obj2, queue): + # the dtype of the division result obj1 / obj2 + + allow_double = has_double_support(queue.device) + + x1 = obj1 if np.isscalar(obj1) else np.ones(1, obj1.dtype) + x2 = obj2 if np.isscalar(obj2) else np.ones(1, obj2.dtype) + + result = (x1/x2).dtype + + if not allow_double: + if result == np.float64: + result = np.dtype(np.float32) + elif result == np.complex128: + result = np.dtype(np.complex64) + + return result + + # Work around PyPy not currently supporting the object dtype. # (Yes, it doesn't even support checking!) # (as of May 27, 2014 on PyPy 2.3) @@ -1080,20 +1099,20 @@ class Array: def __div__(self, other): """Divides an array by an array or a scalar, i.e. ``self / other``. """ + common_dtype = _get_truedivide_dtype(self, other, self.queue) if isinstance(other, Array): - result = self._new_like_me( - _get_common_dtype(self, other, self.queue)) + result = self._new_like_me(common_dtype) result.add_event(self._div(result, self, other)) else: if other == 1: return self.copy() else: # create a new array for the result - common_dtype = _get_common_dtype(self, other, self.queue) result = self._new_like_me(common_dtype) result.add_event( self._axpbz(result, - common_dtype.type(1/other), self, self.dtype.type(0))) + np.true_divide(common_dtype.type(1), other), + self, self.dtype.type(0))) return result @@ -1102,14 +1121,13 @@ class Array: def __rdiv__(self, other): """Divides an array by a scalar or an array, i.e. ``other / self``. """ + common_dtype = _get_truedivide_dtype(self, other, self.queue) if isinstance(other, Array): - result = self._new_like_me( - _get_common_dtype(self, other, self.queue)) + result = self._new_like_me(common_dtype) result.add_event(other._div(result, self)) else: # create a new array for the result - common_dtype = _get_common_dtype(self, other, self.queue) result = self._new_like_me(common_dtype) result.add_event( self._rdiv_scalar(result, self, common_dtype.type(other))) @@ -1118,6 +1136,26 @@ class Array: __rtruediv__ = __rdiv__ + def __itruediv__(self, other): + # raise an error if the result cannot be cast to self + common_dtype = _get_truedivide_dtype(self, other, self.queue) + if not np.can_cast(common_dtype, self.dtype.type): + raise TypeError("Cannot cast {!r} to {!r}" + .format(self.dtype, common_dtype)) + + if isinstance(other, Array): + self.add_event( + self._div(self, self, other)) + else: + if other == 1: + return self + else: + self.add_event( + self._axpbz(self, common_dtype.type(np.true_divide(1, other)), + self, self.dtype.type(0))) + + return self + def __and__(self, other): common_dtype = _get_common_dtype(self, other, self.queue) diff --git a/test/test_array.py b/test/test_array.py index 2cbef16c0d8bbd168ccb9a371f1904cb8cfc022a..39f8fd74e572c49e19d1614d1c352fb625f5553b 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -471,14 +471,29 @@ def test_divide_scalar(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) - a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).astype(np.float32) - a_gpu = cl_array.to_device(queue, a) + dtypes = (np.uint8, np.uint16, np.uint32, + np.int8, np.int16, np.int32, + np.float32, np.complex64) + from pyopencl.characterize import has_double_support + if has_double_support(queue.device): + dtypes = dtypes + (np.float64, np.complex128) + + from itertools import product - result = (a_gpu / 2).get() - assert (a / 2 == result).all() + for dtype_a, dtype_s in product(dtypes, repeat=2): + a = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).astype(dtype_a) + s = dtype_s(40) + a_gpu = cl_array.to_device(queue, a) - result = (2 / a_gpu).get() - assert (np.abs(2 / a - result) < 1e-5).all() + b = a / s + b_gpu = a_gpu / s + assert (np.abs(b_gpu.get() - b) < 1e-3).all() + assert b_gpu.dtype is b.dtype + + c = s / a + c_gpu = s / a_gpu + assert (np.abs(c_gpu.get() - c) < 1e-3).all() + assert c_gpu.dtype is c.dtype def test_divide_array(ctx_factory): @@ -487,18 +502,100 @@ def test_divide_array(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) - #test data - a = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).astype(np.float32) - b = np.array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10]).astype(np.float32) + dtypes = (np.float32, np.complex64) + from pyopencl.characterize import has_double_support + if has_double_support(queue.device): + dtypes = dtypes + (np.float64, np.complex128) - a_gpu = cl_array.to_device(queue, a) - b_gpu = cl_array.to_device(queue, b) + from itertools import product + + for dtype_a, dtype_b in product(dtypes, repeat=2): + + a = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).astype(dtype_a) + b = np.array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10]).astype(dtype_b) + + a_gpu = cl_array.to_device(queue, a) + b_gpu = cl_array.to_device(queue, b) + c = a / b + c_gpu = (a_gpu / b_gpu) + assert (np.abs(c_gpu.get() - c) < 1e-3).all() + assert c_gpu.dtype is c.dtype - a_divide = (a_gpu / b_gpu).get() - assert (np.abs(a / b - a_divide) < 1e-3).all() + d = b / a + d_gpu = (b_gpu / a_gpu) + assert (np.abs(d_gpu.get() - d) < 1e-3).all() + assert d_gpu.dtype is d.dtype + + +def test_divide_inplace_scalar(ctx_factory): + """Test inplace division of arrays and a scalar.""" + + context = ctx_factory() + queue = cl.CommandQueue(context) + + dtypes = (np.uint8, np.uint16, np.uint32, + np.int8, np.int16, np.int32, + np.float32, np.complex64) + from pyopencl.characterize import has_double_support + if has_double_support(queue.device): + dtypes = dtypes + (np.float64, np.complex128) + + from itertools import product + + for dtype_a, dtype_s in product(dtypes, repeat=2): + + a = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).astype(dtype_a) + s = dtype_s(40) + a_gpu = cl_array.to_device(queue, a) + + # ensure the same behavior as inplace numpy.ndarray division + try: + a /= s + except TypeError: + with np.testing.assert_raises(TypeError): + a_gpu /= s + else: + a_gpu /= s + assert (np.abs(a_gpu.get() - a) < 1e-3).all() + assert a_gpu.dtype is a.dtype + + +def test_divide_inplace_array(ctx_factory): + """Test inplace division of arrays.""" + + context = ctx_factory() + queue = cl.CommandQueue(context) + + dtypes = (np.uint8, np.uint16, np.uint32, + np.int8, np.int16, np.int32, + np.float32, np.complex64) + from pyopencl.characterize import has_double_support + if has_double_support(queue.device): + dtypes = dtypes + (np.float64, np.complex128) + + from itertools import product - a_divide = (b_gpu / a_gpu).get() - assert (np.abs(b / a - a_divide) < 1e-3).all() + for dtype_a, dtype_b in product(dtypes, repeat=2): + print(dtype_a, dtype_b) + a = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]).astype(dtype_a) + b = np.array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10]).astype(dtype_b) + + a_gpu = cl_array.to_device(queue, a) + b_gpu = cl_array.to_device(queue, b) + + # ensure the same behavior as inplace numpy.ndarray division + try: + a_gpu /= b_gpu + except TypeError: + # pass for now, as numpy casts differently for in-place and out-place + # true_divide + pass + # with np.testing.assert_raises(TypeError): + # a /= b + else: + a /= b + assert (np.abs(a_gpu.get() - a) < 1e-3).all() + assert a_gpu.dtype is a.dtype def test_bitwise(ctx_factory):