diff --git a/contrib/fortran-to-opencl/translate.py b/contrib/fortran-to-opencl/translate.py index 67865aab3a400d9407856655c852d50394a40ac7..2ded2d8a328765551c2270e6c0efadd87ca80752 100644 --- a/contrib/fortran-to-opencl/translate.py +++ b/contrib/fortran-to-opencl/translate.py @@ -255,7 +255,7 @@ class ComplexCCodeMapper(CCodeMapperBase): complex_sum = self.join_rec(" + ", complexes, PREC_SUM) if real_sum: - result = "to_%s(%s) + %s" % (tgt_name, real_sum, complex_sum) + result = "%s_fromreal(%s) + %s" % (tgt_name, real_sum, complex_sum) else: result = complex_sum @@ -337,12 +337,12 @@ class ComplexCCodeMapper(CCodeMapperBase): e_complex = 'c' == self.infer_type(expr.exponent).kind if b_complex and not e_complex: - return "%s_rpower(%s, %s)" % ( + return "%s_powr(%s, %s)" % ( self.complex_type_name(tgt_dtype), self.rec(expr.base, PREC_NONE), self.rec(expr.exponent, PREC_NONE)) else: - return "%s_power(%s, %s)" % ( + return "%s_pow(%s, %s)" % ( self.complex_type_name(tgt_dtype), self.rec(expr.base, PREC_NONE), self.rec(expr.exponent, PREC_NONE)) diff --git a/doc/source/array.rst b/doc/source/array.rst index a338aecd72e6f9e05f63de0c31cf4456196218be..3d3ccbc78552602657a946dc8e301a664572988c 100644 --- a/doc/source/array.rst +++ b/doc/source/array.rst @@ -149,6 +149,18 @@ The :class:`Array` Class Return *self*, cast to *dtype*. + .. attribute :: real + + .. versionadded:: 2012.1 + + .. attribute :: imag + + .. versionadded:: 2012.1 + + .. method :: conj() + + .. versionadded:: 2012.1 + Constructing :class:`Array` Instances ---------------------------------------- @@ -368,6 +380,30 @@ functions available in the OpenCL standard. (See table 6.8 in the spec.) .. function:: tgamma(array, queue=None) .. function:: trunc(array, queue=None) +Complex Numbers +--------------- + +Since OpenCL 1.2 (and earlier) do not specify native complex number support, +PyOpenCL works around that deficiency. By saying:: + + #include + +in your kernel, you get complex types `cfloat_t` and `cdouble_t`, along with +functions defined on them such as `cfloat_mul(a, b)` or `cdouble_log(z)`. +Elementwise kernels automatically include the header if your kernel has +complex input or output. +See the `source file +`_ +for a precise list of what's available. + +Under the hood, the complex types are simply `float2` and `double2`. + +.. warning:: + Note that addition (real + complex) and multiplication (complex*complex) + are defined for e.g. `float2`, but yield wrong results, so that you need to + use the corresponding functions. + +.. versionadded:: 2012.1 Generating Arrays of Random Numbers ----------------------------------- diff --git a/doc/source/misc.rst b/doc/source/misc.rst index 806fb25975d5819651b45597e24947c86b7d4571..6dda3f20d567abe0cf6bbd64cbf09357cc528204 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -19,6 +19,8 @@ Acknowledgments in getting OpenCL-OpenGL interoperability to work. * Sean True allowed access to a test machine to ensure compatibility with OS X Lion. +* Tomasz Rybak did many great things for PyOpenCL, not the least + of which is packaging for Debian and Ubuntu. Guidelines ========== @@ -67,7 +69,7 @@ C interface to Python: User-visible Changes ==================== -Version 2011.2.1 +Version 2012.1 ---------------- .. note:: @@ -75,6 +77,8 @@ Version 2011.2.1 This version is currently under development. You can get snapshots from PyOpenCL's git version control. +* Support for complex numbers. + Version 2011.2 -------------- diff --git a/pyopencl/__init__.py b/pyopencl/__init__.py index 9dc49c2150151a2975865deadce995e21e3a53eb..0f7d0947df7ebbf75d4cfa42a032c07cb38483d9 100644 --- a/pyopencl/__init__.py +++ b/pyopencl/__init__.py @@ -481,7 +481,7 @@ def _add_functionality(): for i, arg in enumerate(args): self.set_arg(i, arg) else: - from struct import pack + from pyopencl._pvt_struct import pack for i, (arg, arg_type_char) in enumerate( zip(args, arg_type_chars)): diff --git a/pyopencl/array.py b/pyopencl/array.py index c5a0bd0c8bf5a984604a44aac4aeb713c39ae4f3..e608d1960e4af199aa2a0d15cdb51fecfe78b4eb 100644 --- a/pyopencl/array.py +++ b/pyopencl/array.py @@ -384,9 +384,12 @@ class Array(object): @staticmethod @elwise_kernel_runner - def _axpbz(out, afac, a, other, queue=None): - """Compute ``out = afac * a + other``, where `other` is a scalar.""" - return elementwise.get_axpbz_kernel(out.context, out.dtype) + def _axpbz(out, a, x, b, queue=None): + """Compute ``z = a * x + b``, where `b` is a scalar.""" + a = np.array(a) + b = np.array(b) + return elementwise.get_axpbz_kernel(out.context, + a.dtype, x.dtype, b.dtype, out.dtype) @staticmethod @elwise_kernel_runner @@ -397,12 +400,13 @@ class Array(object): @staticmethod @elwise_kernel_runner def _rdiv_scalar(out, ary, other, queue=None): + other = np.array(other) return elementwise.get_rdivide_elwise_kernel( - out.context, ary.dtype) + out.context, ary.dtype, other.dtype, out.dtype) @staticmethod @elwise_kernel_runner - def _div(self, out, other, queue=None): + def _div(out, self, other, queue=None): """Divides an array by another array.""" assert self.shape == other.shape @@ -418,7 +422,10 @@ class Array(object): @staticmethod @elwise_kernel_runner def _abs(result, arg): - if arg.dtype.kind == "f": + if arg.dtype.kind == "c": + from pyopencl.elementwise import complex_dtype_to_name + fname = "%s_abs" % complex_dtype_to_name(arg.dtype) + elif arg.dtype.kind == "f": fname = "fabs" elif arg.dtype.kind in ["u", "i"]: fname = "abs" @@ -426,18 +433,54 @@ class Array(object): raise TypeError("unsupported dtype in _abs()") return elementwise.get_unary_func_kernel( - arg.context, fname, arg.dtype) + arg.context, fname, arg.dtype, out_dtype=result.dtype) + + @staticmethod + @elwise_kernel_runner + def _real(result, arg): + from pyopencl.elementwise import complex_dtype_to_name + fname = "%s_real" % complex_dtype_to_name(arg.dtype) + return elementwise.get_unary_func_kernel( + arg.context, fname, arg.dtype, out_dtype=result.dtype) + + @staticmethod + @elwise_kernel_runner + def _imag(result, arg): + from pyopencl.elementwise import complex_dtype_to_name + fname = "%s_imag" % complex_dtype_to_name(arg.dtype) + return elementwise.get_unary_func_kernel( + arg.context, fname, arg.dtype, out_dtype=result.dtype) + + @staticmethod + @elwise_kernel_runner + def _conj(result, arg): + from pyopencl.elementwise import complex_dtype_to_name + fname = "%s_conj" % complex_dtype_to_name(arg.dtype) + return elementwise.get_unary_func_kernel( + arg.context, fname, arg.dtype, out_dtype=result.dtype) @staticmethod @elwise_kernel_runner def _pow_scalar(result, ary, exponent): - return elementwise.get_pow_kernel(result.context, result.dtype) + exponent = np.array(exponent) + return elementwise.get_pow_kernel(result.context, + ary.dtype, exponent.dtype, result.dtype, + is_base_array=True, is_exp_array=False) + + @staticmethod + @elwise_kernel_runner + def _rpow_scalar(result, base, exponent): + base = np.array(base) + return elementwise.get_pow_kernel(result.context, + base.dtype, exponent.dtype, result.dtype, + is_base_array=False, is_exp_array=True) @staticmethod @elwise_kernel_runner def _pow_array(result, base, exponent): - return elementwise.get_pow_array_kernel( - result.context, base.dtype, exponent.dtype, result.dtype) + return elementwise.get_pow_kernel( + result.context, base.dtype, exponent.dtype, result.dtype, + is_base_array=True, is_exp_array=True) @staticmethod @elwise_kernel_runner @@ -483,15 +526,17 @@ class Array(object): if isinstance(other, Array): # add another vector result = self._new_like_me(_get_common_dtype(self, other)) - self._axpbyz(result, 1, self, 1, other) + self._axpbyz(result, + self.dtype.type(1), self, + other.dtype.type(1), other) return result else: # add a scalar if other == 0: return self else: - result = self._new_like_me() - self._axpbz(result, 1, self, other) + result = self._new_like_me(_get_common_dtype(self, other)) + self._axpbz(result, self.dtype.type(1), self, other) return result __radd__ = __add__ @@ -501,15 +546,17 @@ class Array(object): if isinstance(other, Array): result = self._new_like_me(_get_common_dtype(self, other)) - self._axpbyz(result, 1, self, -1, other) + self._axpbyz(result, + self.dtype.type(1), self, + other.dtype.type(-1), other) return result else: # subtract a scalar if other == 0: return self else: - result = self._new_like_me() - self._axpbz(result, 1, self, -other) + result = self._new_like_me(_get_common_dtype(self, other)) + self._axpbz(result, self.dtype.type(1), self, -other) return result def __rsub__(self,other): @@ -518,24 +565,26 @@ class Array(object): x = n - self """ # other must be a scalar - result = self._new_like_me() - self._axpbz(result, -1, self, other) + result = self._new_like_me(_get_common_dtype(self, other)) + self._axpbz(result, self.dtype.type(-1), self, other) return result def __iadd__(self, other): if isinstance(other, Array): - self._axpbyz(self, 1, self, 1, other) + self._axpbyz(self, + self.dtype.type(1), self, + other.dtype.type(1), other) return self else: - self._axpbz(self, 1, self, other) + self._axpbz(self, self.dtype.type(1), self, other) return self def __isub__(self, other): if isinstance(other, Array): - self._axpbyz(self, 1, self, -1, other) + self._axpbyz(self, self.dtype.type(1), self, other.dtype.type(-1), other) return self else: - self._axpbz(self, 1, self, -other) + self._axpbz(self, self.dtype.type(1), self, -other) return self def __neg__(self): @@ -549,17 +598,17 @@ class Array(object): self._elwise_multiply(result, self, other) return result else: - result = self._new_like_me() - self._axpbz(result, other, self, 0) + result = self._new_like_me(_get_common_dtype(self, other)) + self._axpbz(result, other, self, self.dtype.type(0)) return result def __rmul__(self, scalar): - result = self._new_like_me() - self._axpbz(result, scalar, self, 0) + result = self._new_like_me(_get_common_dtype(self, scalar)) + self._axpbz(result, scalar, self, self.dtype.type(0)) return result def __imul__(self, scalar): - self._axpbz(self, scalar, self, 0) + self._axpbz(self, scalar, self, self.dtype.type(0)) return self def __div__(self, other): @@ -575,8 +624,9 @@ class Array(object): return self else: # create a new array for the result - result = self._new_like_me() - self._axpbz(result, 1/other, self, 0) + result = self._new_like_me(_get_common_dtype(self, other)) + self._axpbz(result, + 1/other, self, self.dtype.type(0)) return result @@ -596,7 +646,7 @@ class Array(object): return self else: # create a new array for the result - result = self._new_like_me() + result = self._new_like_me(_get_common_dtype(self, other)) self._rdiv_scalar(result, self, other) return result @@ -620,7 +670,7 @@ class Array(object): of `self`. """ - result = self._new_like_me() + result = self._new_like_me(self.dtype.type(0).real.dtype) self._abs(result, self) return result @@ -635,11 +685,17 @@ class Array(object): result = self._new_like_me(_get_common_dtype(self, other)) self._pow_array(result, self, other) else: - result = self._new_like_me() + result = self._new_like_me(_get_common_dtype(self, other)) self._pow_scalar(result, self, other) return result + def __rpow__(self, other): + # other must be a scalar + result = self._new_like_me(_get_common_dtype(self, other)) + self._rpow_scalar(result, other, self) + return result + def reverse(self, queue=None): """Return this array in reversed order. The array is treated as one-dimensional. @@ -676,6 +732,36 @@ class Array(object): def __gt__(self, other): raise NotImplementedError + # {{{ complex-valued business + + @property + def real(self): + if self.dtype.kind == "c": + result = self._new_like_me(self.dtype.type(0).real.dtype) + self._real(result, self) + return result + else: + return self + + @property + def imag(self): + if self.dtype.kind == "c": + result = self._new_like_me(self.dtype.type(0).real.dtype) + self._imag(result, self) + return result + else: + return zeros_like(self) + + def conj(self): + if self.dtype.kind == "c": + result = self._new_like_me() + self._conj(result, self) + return result + else: + return self + + # }}} + # {{{ views def reshape(self, *shape): diff --git a/pyopencl/clmath.py b/pyopencl/clmath.py index 46688d41e7c9309df23748d605408edb1c7a6626..63445a272a276c7b6c8d96e63ba12b055ff0840d 100644 --- a/pyopencl/clmath.py +++ b/pyopencl/clmath.py @@ -4,8 +4,14 @@ import pyopencl.elementwise as elementwise def _make_unary_array_func(name): @cl_array.elwise_kernel_runner def knl_runner(result, arg): + if arg.dtype.kind == "c": + from pyopencl.elementwise import complex_dtype_to_name + fname = "%s_%s" % (complex_dtype_to_name(arg.dtype), name) + else: + fname = name + return elementwise.get_unary_func_kernel( - result.context, name, arg.dtype) + result.context, fname, arg.dtype) def f(array, queue=None): result = array._new_like_me(queue=queue) @@ -14,7 +20,7 @@ def _make_unary_array_func(name): return f -# See table 6.8 in the CL spec +# See table 6.8 in the CL 1.1 spec acos = _make_unary_array_func("acos") acosh = _make_unary_array_func("acosh") acospi = _make_unary_array_func("acospi") diff --git a/pyopencl/compyte b/pyopencl/compyte index 1e47eb050e6f48193487ef1d6686f52be59d47f7..25865d27ab46752c93630d2b02e289fdf8080e43 160000 --- a/pyopencl/compyte +++ b/pyopencl/compyte @@ -1 +1 @@ -Subproject commit 1e47eb050e6f48193487ef1d6686f52be59d47f7 +Subproject commit 25865d27ab46752c93630d2b02e289fdf8080e43 diff --git a/pyopencl/elementwise.py b/pyopencl/elementwise.py index fd2f3bd50c786a15f46ffcc98bb6cfa1beb49fa4..f9b115b192082dfc80e66d425401be4d83ca63c4 100644 --- a/pyopencl/elementwise.py +++ b/pyopencl/elementwise.py @@ -77,12 +77,23 @@ def get_elwise_kernel_and_types(context, arguments, operation, else: parsed_args = arguments + pragmas = [] + includes = [] + have_double_pragma = False + have_complex_include = False + for arg in parsed_args: - if np.float64 == arg.dtype: - preamble = ( - "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n\n\n" - + preamble) - break + if arg.dtype in [np.float64, np.complex128]: + if not have_double_pragma: + pragmas.append("#pragma OPENCL EXTENSION cl_khr_fp64: enable\n") + have_double_pragma = True + if arg.dtype.kind == 'c': + if not have_complex_include: + includes.append("#include \n") + have_complex_include = True + + if pragmas or includes: + preamble = "\n".join(pragmas+includes) + "\n" + preamble parsed_args.append(ScalarArg(np.uintp, "n")) @@ -103,6 +114,9 @@ def get_elwise_kernel_and_types(context, arguments, operation, return kernel, parsed_args + + + def get_elwise_kernel(context, arguments, operation, name="elwise_kernel", options=[], **kwargs): """Return a L{pyopencl.Kernel} that performs the same scalar operation @@ -115,8 +129,9 @@ def get_elwise_kernel(context, arguments, operation, return func -class ElementwiseKernel: + +class ElementwiseKernel: def __init__(self, context, arguments, operation, name="elwise_kernel", options=[], **kwargs): @@ -252,12 +267,16 @@ def get_put_kernel(context, dtype, idx_dtype, vec_count=1): @context_dependent_memoize def get_copy_kernel(context, dtype_dest, dtype_src): + src = "src[i]" + if dtype_dest.kind == "c" != dtype_src.kind: + src = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_dest), src) + return get_elwise_kernel(context, "%(tp_dest)s *dest, %(tp_src)s *src" % { "tp_dest": dtype_to_ctype(dtype_dest), "tp_src": dtype_to_ctype(dtype_src), }, - "dest[i] = src[i]", + "dest[i] = %s" % src, name="copy") @@ -311,58 +330,190 @@ def get_linear_combination_kernel(summand_descriptors, return func, tex_src + + +def complex_dtype_to_name(dtype): + if dtype == np.complex128: + return "cdouble" + elif dtype == np.complex64: + return "cfloat" + else: + raise RuntimeError("invalid complex type") + + + + @context_dependent_memoize def get_axpbyz_kernel(context, dtype_x, dtype_y, dtype_z): + ax = "a*x[i]" + by = "b*y[i]" + + x_is_complex = dtype_x.kind == "c" + y_is_complex = dtype_y.kind == "c" + z_is_complex = dtype_z.kind == "c" + + if x_is_complex: + ax = "%s_mul(a, x[i])" % complex_dtype_to_name(dtype_x) + + if y_is_complex: + by = "%s_mul(b, y[i])" % complex_dtype_to_name(dtype_y) + + if x_is_complex and not y_is_complex: + by = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_x), by) + + if not x_is_complex and y_is_complex: + ax = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_y), ax) + + result = "%s + %s" % (ax, by) + if z_is_complex: + result = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), result) + return get_elwise_kernel(context, "%(tp_z)s *z, %(tp_x)s a, %(tp_x)s *x, %(tp_y)s b, %(tp_y)s *y" % { "tp_x": dtype_to_ctype(dtype_x), "tp_y": dtype_to_ctype(dtype_y), "tp_z": dtype_to_ctype(dtype_z), }, - "z[i] = a*x[i] + b*y[i]", + "z[i] = %s" % result, name="axpbyz") @context_dependent_memoize -def get_axpbz_kernel(context, dtype): +def get_axpbz_kernel(context, dtype_a, dtype_x, dtype_b, dtype_z): + + a_is_complex = dtype_a.kind == "c" + x_is_complex = dtype_x.kind == "c" + b_is_complex = dtype_b.kind == "c" + + z_is_complex = dtype_z.kind == "c" + + ax = "a*x[i]" + if a_is_complex and x_is_complex: + a = "a" + x = "x[i]" + + if dtype_a != dtype_z: + a = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), a) + if dtype_x != dtype_z: + x = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), x) + + ax = "%s_mul(%s, %s)" % (complex_dtype_to_name(dtype_z), a, x) + + b = "b" + if z_is_complex and not b_is_complex: + b = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_z), b) + + if z_is_complex and not (a_is_complex or x_is_complex): + ax = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_z), ax) + + if z_is_complex: + ax = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), ax) + b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), b) + return get_elwise_kernel(context, - "%(tp)s *z, %(tp)s a, %(tp)s *x,%(tp)s b" % { - "tp": dtype_to_ctype(dtype)}, - "z[i] = a * x[i] + b", + "%(tp_z)s *z, %(tp_a)s a, %(tp_x)s *x,%(tp_b)s b" % { + "tp_a": dtype_to_ctype(dtype_a), + "tp_x": dtype_to_ctype(dtype_x), + "tp_b": dtype_to_ctype(dtype_b), + "tp_z": dtype_to_ctype(dtype_z), + }, + "z[i] = %s + %s" % (ax, b), name="axpb") @context_dependent_memoize def get_multiply_kernel(context, dtype_x, dtype_y, dtype_z): + x_is_complex = dtype_x.kind == "c" + y_is_complex = dtype_y.kind == "c" + z_is_complex = dtype_z.kind == "c" + + x = "x[i]" + y = "y[i]" + + if x_is_complex and y_is_complex: + xy = "%s_mul(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y) + else: + xy = "%s * %s" % (x, y) + + if z_is_complex: + xy = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), xy) + return get_elwise_kernel(context, "%(tp_z)s *z, %(tp_x)s *x, %(tp_y)s *y" % { "tp_x": dtype_to_ctype(dtype_x), "tp_y": dtype_to_ctype(dtype_y), "tp_z": dtype_to_ctype(dtype_z), }, - "z[i] = x[i] * y[i]", + "z[i] = %s" % xy, name="multiply") @context_dependent_memoize def get_divide_kernel(context, dtype_x, dtype_y, dtype_z): + x_is_complex = dtype_x.kind == "c" + y_is_complex = dtype_y.kind == "c" + z_is_complex = dtype_z.kind == "c" + + x = "x[i]" + y = "y[i]" + + if z_is_complex and dtype_x != dtype_y: + if x_is_complex and dtype_x != dtype_z: + x = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), x) + if y_is_complex and dtype_y != dtype_z: + y = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), y) + + if x_is_complex and y_is_complex: + xoy = "%s_divide(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y) + elif not x_is_complex and y_is_complex: + + xoy = "%s_rdivide(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y) + else: + xoy = "%s / %s" % (x, y) + + if z_is_complex: + xoy = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), xoy) + return get_elwise_kernel(context, "%(tp_z)s *z, %(tp_x)s *x, %(tp_y)s *y" % { "tp_x": dtype_to_ctype(dtype_x), "tp_y": dtype_to_ctype(dtype_y), "tp_z": dtype_to_ctype(dtype_z), }, - "z[i] = x[i] / y[i]", + "z[i] = %s" % xoy, name="divide") @context_dependent_memoize -def get_rdivide_elwise_kernel(context, dtype): +def get_rdivide_elwise_kernel(context, dtype_x, dtype_y, dtype_z): + # implements y / x! + x_is_complex = dtype_x.kind == "c" + y_is_complex = dtype_y.kind == "c" + z_is_complex = dtype_z.kind == "c" + + x = "x[i]" + y = "y" + + if z_is_complex and dtype_x != dtype_y: + if x_is_complex and dtype_x != dtype_z: + x = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), x) + if y_is_complex and dtype_y != dtype_z: + y = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), y) + + if x_is_complex and y_is_complex: + yox = "%s_divide(%s, %s)" % (complex_dtype_to_name(dtype_z), y / x) + elif not y_is_complex and x_is_complex: + yox = "%s_rdivide(%s, %s)" % (complex_dtype_to_name(dtype_x), y, x) + else: + yox = "%s / %s" % (y, x) + return get_elwise_kernel(context, - "%(tp)s *z, %(tp)s *x, %(tp)s y" % { - "tp": dtype_to_ctype(dtype), + "%(tp_z)s *z, %(tp_x)s *x, %(tp_y)s y" % { + "tp_x": dtype_to_ctype(dtype_x), + "tp_y": dtype_to_ctype(dtype_y), + "tp_z": dtype_to_ctype(dtype_z), }, - "z[i] = y / x[i]", + "z[i] = %s" % yox, name="divide_r") @@ -397,24 +548,53 @@ def get_arange_kernel(context, dtype): @context_dependent_memoize -def get_pow_kernel(context, dtype): - return get_elwise_kernel(context, - "%(tp)s *z, %(tp)s *y, %(tp)s value" % { - "tp": dtype_to_ctype(dtype), - }, - "z[i] = pow(y[i], value)", - name="pow_method") +def get_pow_kernel(context, dtype_x, dtype_y, dtype_z, + is_base_array, is_exp_array): + if is_base_array: + x = "x[i]" + x_ctype = "%(tp_x)s *x" + else: + x = "x" + x_ctype = "%(tp_x)s x" + if is_exp_array: + y = "y[i]" + y_ctype = "%(tp_y)s *y" + else: + y = "y" + y_ctype = "%(tp_y)s y" + + x_is_complex = dtype_x.kind == "c" + y_is_complex = dtype_y.kind == "c" + z_is_complex = dtype_z.kind == "c" + + if z_is_complex and dtype_x != dtype_y: + if x_is_complex and dtype_x != dtype_z: + x = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), x) + if y_is_complex and dtype_y != dtype_z: + y = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), y) + elif dtype_x != dtype_y: + if dtype_x != dtype_z: + x = "(%s) (%s)" % (dtype_to_ctype(dtype_z), x) + if dtype_y != dtype_z: + y = "(%s) (%s)" % (dtype_to_ctype(dtype_z), y) + + if x_is_complex and y_is_complex: + result = "%s_pow(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y) + elif x_is_complex and not y_is_complex: + result = "%s_powr(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y) + elif not x_is_complex and y_is_complex: + result = "%s_rpow(%s, %s)" % (complex_dtype_to_name(dtype_z), x, y) + else: + result = "pow(%s, %s)" % (x, y) -@context_dependent_memoize -def get_pow_array_kernel(context, dtype_x, dtype_y, dtype_z): return get_elwise_kernel(context, - "%(tp_z)s *z, %(tp_x)s *x, %(tp_y)s *y" % { + ("%(tp_z)s *z, " + x_ctype + ", "+y_ctype) % { "tp_x": dtype_to_ctype(dtype_x), "tp_y": dtype_to_ctype(dtype_y), "tp_z": dtype_to_ctype(dtype_z), }, - "z[i] = pow(x[i], y[i])", + "z[i] = %s" % result, name="pow_method") diff --git a/pyopencl/tools.py b/pyopencl/tools.py index 4af2a8fc35947b19c492b51f3fa5805847ee372e..a49256dfadc6dac864747228a4695751056300a5 100644 --- a/pyopencl/tools.py +++ b/pyopencl/tools.py @@ -38,6 +38,9 @@ from pyopencl.compyte.dtypes import ( dtype_to_ctype) _fill_dtype_registry(respect_windows=False) +register_dtype(np.complex64, "cfloat_t") +register_dtype(np.complex128, "cdouble_t") + diff --git a/setup.py b/setup.py index 391aebfe66fb2d212b86c895ffd92640b964426b..95ac9cc8fb16fb77caa4a8eea1eb8568aeaec607 100644 --- a/setup.py +++ b/setup.py @@ -140,6 +140,12 @@ def main(): from aksetup_helper import count_down_delay count_down_delay(delay=5) + import sys + if sys.version_info >= (3,): + pvt_struct_source = "src/wrapper/_pvt_struct_v3.cpp" + else: + pvt_struct_source = "src/wrapper/_pvt_struct_v2.cpp" + setup(name="pyopencl", # metadata version=ver_dic["VERSION_TEXT"], @@ -227,6 +233,11 @@ def main(): extra_compile_args=conf["CXXFLAGS"], extra_link_args=conf["LDFLAGS"], ), + NumpyExtension("_pvt_struct", + [pvt_struct_source], + extra_compile_args=conf["CXXFLAGS"], + extra_link_args=conf["LDFLAGS"], + ), ], data_files=[ diff --git a/src/cl/pyopencl-complex.h b/src/cl/pyopencl-complex.h index 57dc9621fbe5c1d01d34536fbae3822e46f39cc4..26bfbde38af8c5e79ea6f4fb765ae14656e48a8f 100644 --- a/src/cl/pyopencl-complex.h +++ b/src/cl/pyopencl-complex.h @@ -24,18 +24,18 @@ // functions as visible below, e.g. cdouble_log(z). // // Under the hood, the complex types are simply float2 and double2. +// Note that addition (real + complex) and multiplication (complex*complex) +// are defined, but yield wrong results. -#define PYOPENCL_DECLARE_COMPLEX_TYPE_INT(REAL_TP, TPROOT, TP) \ +#define PYOPENCL_DECLARE_COMPLEX_TYPE_INT(REAL_TP, REAL_3LTR, TPROOT, TP) \ \ REAL_TP TPROOT##_real(TP a) { return a.x; } \ REAL_TP TPROOT##_imag(TP a) { return a.y; } \ REAL_TP TPROOT##_abs(TP a) { return hypot(a.x, a.y); } \ \ + TP TPROOT##_fromreal(REAL_TP a) { return (TP)(a, 0); } \ TP TPROOT##_conj(TP a) { return (TP)(a.x, -a.y); } \ \ - TP to_##TPROOT(REAL_TP a) \ - { return (TP)(a, 0); } \ - \ TP TPROOT##_mul(TP a, TP b) \ { \ return (TP)( \ @@ -81,18 +81,19 @@ } \ } \ \ - TP TPROOT##_exp(TP a) \ + TP TPROOT##_pow(TP a, TP b) \ { \ - REAL_TP expr = exp(a.x); \ - REAL_TP cosi; \ - REAL_TP sini = sincos(a.y, &cosi); \ - return (TP)(expr * cosi, expr * sini); \ + REAL_TP logr = log(hypot(a.x, a.y)); \ + REAL_TP logi = atan2(a.y, a.x); \ + REAL_TP x = exp(logr * b.x - logi * b.y); \ + REAL_TP y = logr * b.y + logi * b.x; \ + \ + REAL_TP cosy; \ + REAL_TP siny = sincos(y, &cosy); \ + return (TP) (x*cosy, x*siny); \ } \ \ - TP TPROOT##_log(TP a) \ - { return (TP)(log(hypot(a.x, a.y)), atan2(a.y, a.x)); } \ - \ - TP TPROOT##_rpower(TP a, REAL_TP b) \ + TP TPROOT##_powr(TP a, REAL_TP b) \ { \ REAL_TP logr = log(hypot(a.x, a.y)); \ REAL_TP logi = atan2(a.y, a.x); \ @@ -105,6 +106,17 @@ return (TP)(x * cosy, x*siny); \ } \ \ + TP TPROOT##_rpow(REAL_TP a, TP b) \ + { \ + REAL_TP logr = log(a); \ + REAL_TP x = exp(logr * b.x); \ + REAL_TP y = logr * b.y; \ + \ + REAL_TP cosy; \ + REAL_TP siny = sincos(y, &cosy); \ + return (TP) (x * cosy, x * siny); \ + } \ + \ TP TPROOT##_sqrt(TP a) \ { \ REAL_TP re = a.x; \ @@ -124,12 +136,88 @@ result.x = im/result.y/2.f; \ } \ return result; \ - } + } \ + \ + TP TPROOT##_exp(TP a) \ + { \ + REAL_TP expr = exp(a.x); \ + REAL_TP cosi; \ + REAL_TP sini = sincos(a.y, &cosi); \ + return (TP)(expr * cosi, expr * sini); \ + } \ + \ + TP TPROOT##_log(TP a) \ + { return (TP)(log(hypot(a.x, a.y)), atan2(a.y, a.x)); } \ + \ + TP TPROOT##_sin(TP a) \ + { \ + REAL_TP cosr; \ + REAL_TP sinr = sincos(a.x, &cosr); \ + return (TP)(sinr*cosh(a.y), cosr*sinh(a.y)); \ + } \ + \ + TP TPROOT##_cos(TP a) \ + { \ + REAL_TP cosr; \ + REAL_TP sinr = sincos(a.x, &cosr); \ + return (TP)(cosr*cosh(a.y), -sinr*sinh(a.y)); \ + } \ + \ + TP TPROOT##_tan(TP a) \ + { \ + REAL_TP re2 = 2.f * a.x; \ + REAL_TP im2 = 2.f * a.y; \ + \ + const REAL_TP limit = log(REAL_3LTR##_MAX); \ + \ + if (fabs(im2) > limit) \ + return (TP)(0.f, (im2 > 0 ? 1.f : -1.f)); \ + else \ + { \ + REAL_TP den = cos(re2) + cosh(im2); \ + return (TP) (sin(re2) / den, sinh(im2) / den); \ + } \ + } \ + \ + TP TPROOT##_sinh(TP a) \ + { \ + REAL_TP cosi; \ + REAL_TP sini = sincos(a.y, &cosi); \ + return (TP)(sinh(a.x)*cosi, cosh(a.x)*sini); \ + } \ + \ + TP TPROOT##_cosh(TP a) \ + { \ + REAL_TP cosi; \ + REAL_TP sini = sincos(a.y, &cosi); \ + return (TP)(cosh(a.x)*cosi, sinh(a.x)*sini); \ + } \ + \ + TP TPROOT##_tanh(TP a) \ + { \ + REAL_TP re2 = 2.f * a.x; \ + REAL_TP im2 = 2.f * a.y; \ + \ + const REAL_TP limit = log(REAL_3LTR##_MAX); \ + \ + if (fabs(re2) > limit) \ + return (TP)((re2 > 0 ? 1.f : -1.f), 0.f); \ + else \ + { \ + REAL_TP den = cosh(re2) + cos(im2); \ + return (TP) (sinh(re2) / den, sin(im2) / den); \ + } \ + } \ -#define PYOPENCL_DECLARE_COMPLEX_TYPE(BASE) \ +#define PYOPENCL_DECLARE_COMPLEX_TYPE(BASE, BASE_3LTR) \ typedef BASE##2 c##BASE##_t; \ \ - PYOPENCL_DECLARE_COMPLEX_TYPE_INT(BASE, c##BASE, c##BASE##_t) + PYOPENCL_DECLARE_COMPLEX_TYPE_INT(BASE, BASE_3LTR, c##BASE, c##BASE##_t) + +PYOPENCL_DECLARE_COMPLEX_TYPE(float, FLT); +#define cfloat_cast(a) ((cfloat_t) ((a).x, (a).y)) -PYOPENCL_DECLARE_COMPLEX_TYPE(float); -PYOPENCL_DECLARE_COMPLEX_TYPE(double); +#ifdef DBL_EPSILON +PYOPENCL_DECLARE_COMPLEX_TYPE(double, DBL); +#define cdouble_cast(a) ((cdouble_t) ((a).x, (a).y)) +#endif diff --git a/src/wrapper/numpy_init.hpp b/src/wrapper/numpy_init.hpp index 54dc31e7e6635c358c675e9bfdb182fee3055150..9d34ac5793daced73e208f751aa62a2c7cb5865f 100644 --- a/src/wrapper/numpy_init.hpp +++ b/src/wrapper/numpy_init.hpp @@ -5,6 +5,7 @@ #include +#include diff --git a/test/test_array.py b/test/test_array.py index 0e9a926a0b7a26dd61ff29c98036779ad9b197fc..0f6ffebd6cbeea2de75b7cf11523dacc9eebffaf 100644 --- a/test/test_array.py +++ b/test/test_array.py @@ -49,24 +49,130 @@ def test_pow_number(ctx_factory): @pytools.test.mark_test.opencl -def test_abs(ctx_factory): +def test_absrealimag(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) - a = -cl_array.arange(queue, 111, dtype=np.float32) - res = a.get() + def real(x): return x.real + def imag(x): return x.imag + def conj(x): return x.conj() - for i in range(111): - assert res[i] <= 0 + n = 111 + for func in [abs, real, imag, conj]: + for dtype in [np.int32, np.float32, np.complex64]: + print func, dtype + a = -make_random_array(queue, dtype, n) - a = abs(a) + host_res = func(a.get()) + dev_res = func(a).get() - res = a.get() + correct = np.allclose(dev_res, host_res) + if not correct: + print dev_res + print host_res + print dev_res-host_res + assert correct - for i in range(111): - assert abs(res[i]) >= 0 - assert res[i] == i +TO_REAL = { + np.dtype(np.complex64): np.float32, + np.dtype(np.complex128): np.float64 + } + +def make_random_array(queue, dtype, size): + from pyopencl.clrandom import rand + + dtype = np.dtype(dtype) + if dtype.kind == "c": + real_dtype = TO_REAL[dtype] + return (rand(queue, shape=(size,), dtype=real_dtype).astype(dtype) + + dtype.type(1j) + * rand(queue, shape=(size,), dtype=real_dtype).astype(dtype)) + else: + return rand(queue, shape=(size,), dtype=dtype) + +@pytools.test.mark_test.opencl +def test_basic_complex(ctx_factory): + context = ctx_factory() + queue = cl.CommandQueue(context) + + from pyopencl.clrandom import rand + + size = 500 + + ary = (rand(queue, shape=(size,), dtype=np.float32).astype(np.complex64) + + 1j* rand(queue, shape=(size,), dtype=np.float32).astype(np.complex64)) + c = np.complex64(5+7j) + + assert ((c*ary).get() == c*ary.get()).all() + +@pytools.test.mark_test.opencl +def test_mix_complex(ctx_factory): + context = ctx_factory() + queue = cl.CommandQueue(context) + + size = 10 + + dtypes = [ + (np.float32, np.complex64), + #(np.int32, np.complex64), + ] + + if has_double_support(context.devices[0]): + dtypes.extend([ + (np.float32, np.float64), + (np.float32, np.complex128), + (np.float64, np.complex64), + (np.float64, np.complex128), + ]) + + from operator import add, mul, sub, div + for op in [add, sub, mul, div, pow]: + for dtype_a0, dtype_b0 in dtypes: + for dtype_a, dtype_b in [ + (dtype_a0, dtype_b0), + (dtype_b0, dtype_a0), + ]: + for is_scalar_a, is_scalar_b in [ + (False, False), + (False, True), + (True, False), + ]: + if is_scalar_a: + ary_a = make_random_array(queue, dtype_a, 1).get()[0] + host_ary_a = ary_a + else: + ary_a = make_random_array(queue, dtype_a, size) + host_ary_a = ary_a.get() + + if is_scalar_b: + ary_b = make_random_array(queue, dtype_b, 1).get()[0] + host_ary_b = ary_b + else: + ary_b = make_random_array(queue, dtype_b, size) + host_ary_b = ary_b.get() + + print op, dtype_a, dtype_b, is_scalar_a, is_scalar_b + dev_result = op(ary_a, ary_b).get() + host_result = op(host_ary_a, host_ary_b) + + if host_result.dtype != dev_result.dtype: + # This appears to be a numpy bug, where we get + # served a Python complex that is really a + # smaller numpy complex. + + print "HOST_DTYPE: %s DEV_DTYPE: %s" % ( + host_result.dtype, dev_result.dtype) + + dev_result = dev_result.astype(host_result.dtype) + + correct = np.allclose(host_result, dev_result) + if not correct: + print host_result + print dev_result + print host_result - dev_result + + assert correct @pytools.test.mark_test.opencl def test_len(ctx_factory): @@ -88,13 +194,14 @@ def test_multiply(ctx_factory): for sz in [10, 50000]: for dtype, scalars in [ (np.float32, [2]), + (np.complex64, [2j]), ]: for scalar in scalars: - a = np.arange(sz).astype(dtype) - a_gpu = cl_array.to_device(queue, a) - a_doubled = (scalar * a_gpu).get() + a_gpu = make_random_array(queue, dtype, sz) + a = a_gpu.get() + a_mult = (scalar * a_gpu).get() - assert (a * scalar == a_doubled).all() + assert (a * scalar == a_mult).all() @pytools.test.mark_test.opencl diff --git a/test/test_clmath.py b/test/test_clmath.py index ab64448cf8fd43312009ecdd899bdb58598c07eb..fb8bc770e2a800bb17e5c6b8be0ca65a09e6672f 100644 --- a/test/test_clmath.py +++ b/test/test_clmath.py @@ -36,7 +36,7 @@ numpy_func_names = { -def make_unary_function_test(name, limits=(0, 1), threshold=0): +def make_unary_function_test(name, limits=(0, 1), threshold=0, use_complex=False): (a, b) = limits a = float(a) b = float(b) @@ -49,19 +49,33 @@ def make_unary_function_test(name, limits=(0, 1), threshold=0): cpu_func = getattr(np, numpy_func_names.get(name, name)) if has_double_support(context.devices[0]): - dtypes = [np.float32, np.float64] + if use_complex: + dtypes = [np.float32, np.float64, np.complex64, np.complex128] + else: + dtypes = [np.float32, np.float64] else: - dtypes = [np.float32] + if use_complex: + dtypes = [np.float32, np.complex64] + else: + dtypes = [np.float32] for s in sizes: for dtype in dtypes: - args = cl_array.arange(queue, a, b, (b-a)/s, - dtype=np.float32) + dtype = np.dtype(dtype) + + args = cl_array.arange(queue, a, b, (b-a)/s, dtype=dtype) + if dtype.kind == "c": + args = args+dtype.type(1j)*args + gpu_results = gpu_func(args).get() cpu_results = cpu_func(args.get()) + my_threshold = threshold + if dtype.kind == "c" and isinstance(use_complex, float): + my_threshold = use_complex + max_err = np.max(np.abs(cpu_results - gpu_results)) - assert (max_err <= threshold).all(), \ + assert (max_err <= my_threshold).all(), \ (max_err, name, dtype) return pytools.test.mark_test.opencl(test) @@ -73,22 +87,22 @@ if have_cl(): test_ceil = make_unary_function_test("ceil", (-10, 10)) test_floor = make_unary_function_test("ceil", (-10, 10)) test_fabs = make_unary_function_test("fabs", (-10, 10)) - test_exp = make_unary_function_test("exp", (-3, 3), 1e-5) - test_log = make_unary_function_test("log", (1e-5, 1), 1e-6) + test_exp = make_unary_function_test("exp", (-3, 3), 1e-5, use_complex=True) + test_log = make_unary_function_test("log", (1e-5, 1), 1e-6, use_complex=True) test_log10 = make_unary_function_test("log10", (1e-5, 1), 5e-7) - test_sqrt = make_unary_function_test("sqrt", (1e-5, 1), 2e-7) + test_sqrt = make_unary_function_test("sqrt", (1e-5, 1), 2e-7, use_complex=True) - test_sin = make_unary_function_test("sin", (-10, 10), 2e-7) - test_cos = make_unary_function_test("cos", (-10, 10), 2e-7) + test_sin = make_unary_function_test("sin", (-10, 10), 2e-7, use_complex=2e-3) + test_cos = make_unary_function_test("cos", (-10, 10), 2e-7, use_complex=2e-3) test_asin = make_unary_function_test("asin", (-0.9, 0.9), 5e-7) test_acos = make_unary_function_test("acos", (-0.9, 0.9), 5e-7) test_tan = make_unary_function_test("tan", - (-math.pi/2 + 0.1, math.pi/2 - 0.1), 1e-5) + (-math.pi/2 + 0.1, math.pi/2 - 0.1), 1e-5, use_complex=True) test_atan = make_unary_function_test("atan", (-10, 10), 2e-7) - test_sinh = make_unary_function_test("sinh", (-3, 3), 1e-6) - test_cosh = make_unary_function_test("cosh", (-3, 3), 1e-6) - test_tanh = make_unary_function_test("tanh", (-3, 3), 2e-6) + test_sinh = make_unary_function_test("sinh", (-3, 3), 1e-6, use_complex=2e-3) + test_cosh = make_unary_function_test("cosh", (-3, 3), 1e-6, use_complex=2e-3) + test_tanh = make_unary_function_test("tanh", (-3, 3), 2e-6, use_complex=True)